mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-24 04:12:47 +03:00
beautify
This commit is contained in:
parent
a4a7c14593
commit
cc8c6b7b10
@ -94,8 +94,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
mert::VocabularyFactory::GetVocabulary()->clear();
|
||||
|
||||
//load reference data
|
||||
for (size_t i = 0; i < referenceFiles.size(); ++i)
|
||||
{
|
||||
for (size_t i = 0; i < referenceFiles.size(); ++i) {
|
||||
TRACE_ERR("Loading reference from " << referenceFiles[i] << endl);
|
||||
|
||||
ifstream ifs(referenceFiles[i].c_str());
|
||||
@ -133,28 +132,27 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
|
||||
|
||||
void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const
|
||||
{
|
||||
NgramCounts counts;
|
||||
size_t length = CountNgrams(line, counts, kBleuNgramOrder);
|
||||
NgramCounts counts;
|
||||
size_t length = CountNgrams(line, counts, kBleuNgramOrder);
|
||||
|
||||
//for any counts larger than those already there, merge them in
|
||||
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
|
||||
const NgramCounts::Key& ngram = ci->first;
|
||||
const NgramCounts::Value newcount = ci->second;
|
||||
//for any counts larger than those already there, merge them in
|
||||
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
|
||||
const NgramCounts::Key& ngram = ci->first;
|
||||
const NgramCounts::Value newcount = ci->second;
|
||||
|
||||
NgramCounts::Value oldcount = 0;
|
||||
ref->get_counts()->Lookup(ngram, &oldcount);
|
||||
if (newcount > oldcount) {
|
||||
ref->get_counts()->operator[](ngram) = newcount;
|
||||
}
|
||||
NgramCounts::Value oldcount = 0;
|
||||
ref->get_counts()->Lookup(ngram, &oldcount);
|
||||
if (newcount > oldcount) {
|
||||
ref->get_counts()->operator[](ngram) = newcount;
|
||||
}
|
||||
//add in the length
|
||||
ref->push_back(length);
|
||||
}
|
||||
//add in the length
|
||||
ref->push_back(length);
|
||||
}
|
||||
|
||||
bool BleuScorer::GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const
|
||||
{
|
||||
for (vector<boost::shared_ptr<ifstream> >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs)
|
||||
{
|
||||
for (vector<boost::shared_ptr<ifstream> >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) {
|
||||
if (!(*ifs)) return false;
|
||||
string line;
|
||||
if (!getline(**ifs, line)) return false;
|
||||
@ -309,22 +307,20 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
|
||||
|
||||
vector<FeatureDataIterator> featureDataIters;
|
||||
vector<ScoreDataIterator> scoreDataIters;
|
||||
for (size_t i = 0; i < featureFiles.size(); ++i)
|
||||
{
|
||||
for (size_t i = 0; i < featureFiles.size(); ++i) {
|
||||
featureDataIters.push_back(FeatureDataIterator(featureFiles[i]));
|
||||
scoreDataIters.push_back(ScoreDataIterator(scoreFiles[i]));
|
||||
}
|
||||
|
||||
vector<pair<size_t,size_t> > hypotheses;
|
||||
UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(),
|
||||
UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(),
|
||||
"At the end of feature data iterator");
|
||||
for (size_t i = 0; i < featureFiles.size(); ++i)
|
||||
{
|
||||
UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
|
||||
for (size_t i = 0; i < featureFiles.size(); ++i) {
|
||||
UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
|
||||
"Feature file " << i << " ended prematurely");
|
||||
UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(),
|
||||
UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(),
|
||||
"Score file " << i << " ended prematurely");
|
||||
UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(),
|
||||
UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(),
|
||||
"Features and scores have different size");
|
||||
for (size_t j = 0; j < featureDataIters[i]->size(); ++j) {
|
||||
hypotheses.push_back(pair<size_t,size_t>(i,j));
|
||||
|
@ -13,7 +13,8 @@
|
||||
using namespace std;
|
||||
using namespace MosesTuning;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) {
|
||||
BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
|
||||
{
|
||||
Vocab vocab;
|
||||
WordVec words;
|
||||
string wordStrings[] =
|
||||
@ -244,7 +245,8 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
|
||||
BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) {
|
||||
BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph)
|
||||
{
|
||||
Vocab vocab;
|
||||
//References
|
||||
ReferenceSet references;
|
||||
|
@ -11,7 +11,8 @@ how many of the features are really "dense". This is because in hg mira
|
||||
all features (sparse and dense) are to get rolled in to SparseVector
|
||||
*/
|
||||
|
||||
BOOST_AUTO_TEST_CASE(from_sparse) {
|
||||
BOOST_AUTO_TEST_CASE(from_sparse)
|
||||
{
|
||||
SparseVector sp;
|
||||
sp.set("dense0", 0.2);
|
||||
sp.set("dense1", 0.3);
|
||||
|
@ -474,7 +474,7 @@ int main(int argc, char **argv)
|
||||
// A task for each start point
|
||||
for (size_t j = 0; j < startingPoints.size(); ++j) {
|
||||
boost::shared_ptr<OptimizationTask>
|
||||
task(new OptimizationTask(optimizer, startingPoints[j]));
|
||||
task(new OptimizationTask(optimizer, startingPoints[j]));
|
||||
tasks.push_back(task);
|
||||
#ifdef WITH_THREADS
|
||||
pool.Submit(task);
|
||||
|
@ -32,8 +32,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// initialize reference streams
|
||||
std::vector<boost::shared_ptr<std::ifstream> > refStreams;
|
||||
for (std::vector<std::string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile)
|
||||
{
|
||||
for (std::vector<std::string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
|
||||
TRACE_ERR("Loading reference from " << *refFile << std::endl);
|
||||
boost::shared_ptr<std::ifstream> ifs(new std::ifstream(refFile->c_str()));
|
||||
UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
|
||||
@ -44,8 +43,7 @@ int main(int argc, char **argv)
|
||||
std::string nbestLine;
|
||||
int sid = -1;
|
||||
Reference ref;
|
||||
while ( getline(std::cin, nbestLine) )
|
||||
{
|
||||
while ( getline(std::cin, nbestLine) ) {
|
||||
std::vector<std::string> items;
|
||||
Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| ");
|
||||
int sidCurrent = Moses::Scan<int>(items[0]);
|
||||
|
@ -34,8 +34,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// initialize reference streams
|
||||
vector<boost::shared_ptr<ifstream> > refStreams;
|
||||
for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile)
|
||||
{
|
||||
for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
|
||||
TRACE_ERR("Loading reference from " << *refFile << endl);
|
||||
boost::shared_ptr<ifstream> ifs(new ifstream(refFile->c_str()));
|
||||
UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
|
||||
@ -45,8 +44,7 @@ int main(int argc, char **argv)
|
||||
// load sentences, preparing statistics, score
|
||||
string hypothesisLine;
|
||||
size_t sid = 0;
|
||||
while (getline(std::cin, hypothesisLine))
|
||||
{
|
||||
while (getline(std::cin, hypothesisLine)) {
|
||||
Reference ref;
|
||||
if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) {
|
||||
UTIL_THROW2("Missing references");
|
||||
|
@ -177,39 +177,34 @@ int main(int argc, char* argv[])
|
||||
const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
|
||||
|
||||
boost::shared_ptr<InputType> source;
|
||||
while((source = ioWrapper->ReadInput()) != NULL)
|
||||
{
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask> ttask;
|
||||
ttask = TranslationTask::create(source, ioWrapper);
|
||||
Manager manager(ttask);
|
||||
manager.Decode();
|
||||
TrellisPathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,true);
|
||||
//grid search
|
||||
BOOST_FOREACH(float const& p, pgrid)
|
||||
{
|
||||
SD.SetLatticeMBRPrecision(p);
|
||||
BOOST_FOREACH(float const& r, rgrid)
|
||||
{
|
||||
SD.SetLatticeMBRPRatio(r);
|
||||
BOOST_FOREACH(size_t const prune_i, prune_grid)
|
||||
{
|
||||
SD.SetLatticeMBRPruningFactor(size_t(prune_i));
|
||||
BOOST_FOREACH(float const& scale_i, scale_grid)
|
||||
{
|
||||
SD.SetMBRScale(scale_i);
|
||||
size_t lineCount = source->GetTranslationId();
|
||||
cout << lineCount << " ||| " << p << " "
|
||||
<< r << " " << size_t(prune_i) << " " << scale_i
|
||||
<< " ||| ";
|
||||
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
||||
manager.OutputBestHypo(mbrBestHypo, lineCount,
|
||||
SD.GetReportSegmentation(),
|
||||
SD.GetReportAllFactors(),cout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while((source = ioWrapper->ReadInput()) != NULL) {
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask> ttask;
|
||||
ttask = TranslationTask::create(source, ioWrapper);
|
||||
Manager manager(ttask);
|
||||
manager.Decode();
|
||||
TrellisPathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,true);
|
||||
//grid search
|
||||
BOOST_FOREACH(float const& p, pgrid) {
|
||||
SD.SetLatticeMBRPrecision(p);
|
||||
BOOST_FOREACH(float const& r, rgrid) {
|
||||
SD.SetLatticeMBRPRatio(r);
|
||||
BOOST_FOREACH(size_t const prune_i, prune_grid) {
|
||||
SD.SetLatticeMBRPruningFactor(size_t(prune_i));
|
||||
BOOST_FOREACH(float const& scale_i, scale_grid) {
|
||||
SD.SetMBRScale(scale_i);
|
||||
size_t lineCount = source->GetTranslationId();
|
||||
cout << lineCount << " ||| " << p << " "
|
||||
<< r << " " << size_t(prune_i) << " " << scale_i
|
||||
<< " ||| ";
|
||||
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
||||
manager.OutputBestHypo(mbrBestHypo, lineCount,
|
||||
SD.GetReportSegmentation(),
|
||||
SD.GetReportAllFactors(),cout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -144,27 +144,28 @@ int main(int argc, char** argv)
|
||||
#endif
|
||||
|
||||
// main loop over set of input sentences
|
||||
|
||||
|
||||
boost::shared_ptr<InputType> source;
|
||||
while ((source = ioWrapper->ReadInput()) != NULL)
|
||||
{
|
||||
IFVERBOSE(1) { ResetUserTime(); }
|
||||
|
||||
InputType* foo = source.get();
|
||||
FeatureFunction::CallChangeSource(foo);
|
||||
|
||||
// set up task of training one sentence
|
||||
boost::shared_ptr<TrainingTask> task;
|
||||
task = TrainingTask::create(source, ioWrapper);
|
||||
|
||||
// execute task
|
||||
#ifdef WITH_THREADS
|
||||
pool.Submit(task);
|
||||
#else
|
||||
task->Run();
|
||||
#endif
|
||||
while ((source = ioWrapper->ReadInput()) != NULL) {
|
||||
IFVERBOSE(1) {
|
||||
ResetUserTime();
|
||||
}
|
||||
|
||||
InputType* foo = source.get();
|
||||
FeatureFunction::CallChangeSource(foo);
|
||||
|
||||
// set up task of training one sentence
|
||||
boost::shared_ptr<TrainingTask> task;
|
||||
task = TrainingTask::create(source, ioWrapper);
|
||||
|
||||
// execute task
|
||||
#ifdef WITH_THREADS
|
||||
pool.Submit(task);
|
||||
#else
|
||||
task->Run();
|
||||
#endif
|
||||
}
|
||||
|
||||
// we are done, finishing up
|
||||
#ifdef WITH_THREADS
|
||||
pool.Stop(true); //flush remaining jobs
|
||||
|
@ -17,7 +17,9 @@ BaseManager::BaseManager(ttasksptr const& ttask)
|
||||
|
||||
const InputType&
|
||||
BaseManager::GetSource() const
|
||||
{ return m_source; }
|
||||
{
|
||||
return m_source;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -36,8 +36,8 @@ class ChartCellCollectionBase
|
||||
{
|
||||
public:
|
||||
template <class Factory> ChartCellCollectionBase(const InputType &input,
|
||||
const Factory &factory,
|
||||
const ChartParser &parser)
|
||||
const Factory &factory,
|
||||
const ChartParser &parser)
|
||||
:m_cells(input.GetSize()) {
|
||||
|
||||
size_t size = input.GetSize();
|
||||
|
@ -299,7 +299,7 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
||||
= StaticData::Instance().GetTranslationOptionThreshold();
|
||||
TranslationOptionCollection *rv
|
||||
= new TranslationOptionCollectionConfusionNet
|
||||
(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
assert(rv);
|
||||
return rv;
|
||||
}
|
||||
|
@ -18,80 +18,75 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class ContextScope
|
||||
{
|
||||
protected:
|
||||
typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
|
||||
typedef scratchpad_t::iterator iter_t;
|
||||
typedef scratchpad_t::value_type entry_t;
|
||||
typedef scratchpad_t::const_iterator const_iter_t;
|
||||
scratchpad_t m_scratchpad;
|
||||
mutable boost::shared_mutex m_lock;
|
||||
public:
|
||||
// class write_access
|
||||
// {
|
||||
// boost::unique_lock<boost::shared_mutex> m_lock;
|
||||
// public:
|
||||
class ContextScope
|
||||
{
|
||||
protected:
|
||||
typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
|
||||
typedef scratchpad_t::iterator iter_t;
|
||||
typedef scratchpad_t::value_type entry_t;
|
||||
typedef scratchpad_t::const_iterator const_iter_t;
|
||||
scratchpad_t m_scratchpad;
|
||||
mutable boost::shared_mutex m_lock;
|
||||
public:
|
||||
// class write_access
|
||||
// {
|
||||
// boost::unique_lock<boost::shared_mutex> m_lock;
|
||||
// public:
|
||||
|
||||
// write_access(boost::shared_mutex& lock)
|
||||
// : m_lock(lock)
|
||||
// { }
|
||||
// write_access(boost::shared_mutex& lock)
|
||||
// : m_lock(lock)
|
||||
// { }
|
||||
|
||||
// write_access(write_access& other)
|
||||
// {
|
||||
// swap(m_lock, other.m_lock);
|
||||
// }
|
||||
// };
|
||||
// write_access(write_access& other)
|
||||
// {
|
||||
// swap(m_lock, other.m_lock);
|
||||
// }
|
||||
// };
|
||||
|
||||
// write_access lock() const
|
||||
// {
|
||||
// return write_access(m_lock);
|
||||
// }
|
||||
// write_access lock() const
|
||||
// {
|
||||
// return write_access(m_lock);
|
||||
// }
|
||||
|
||||
template<typename T>
|
||||
boost::shared_ptr<void> const&
|
||||
set(void const* const key, boost::shared_ptr<T> const& val)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
||||
return (m_scratchpad[key] = val);
|
||||
}
|
||||
template<typename T>
|
||||
boost::shared_ptr<void> const&
|
||||
set(void const* const key, boost::shared_ptr<T> const& val) {
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
||||
return (m_scratchpad[key] = val);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
boost::shared_ptr<T> const
|
||||
get(void const* key, bool CreateNewIfNecessary=false)
|
||||
{
|
||||
using boost::shared_mutex;
|
||||
using boost::upgrade_lock;
|
||||
// T const* key = reinterpret_cast<T const*>(xkey);
|
||||
upgrade_lock<shared_mutex> lock(m_lock);
|
||||
iter_t m = m_scratchpad.find(key);
|
||||
boost::shared_ptr< T > ret;
|
||||
if (m != m_scratchpad.end())
|
||||
{
|
||||
if (m->second == NULL && CreateNewIfNecessary)
|
||||
{
|
||||
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
||||
m->second.reset(new T);
|
||||
}
|
||||
ret = boost::static_pointer_cast< T >(m->second);
|
||||
return ret;
|
||||
}
|
||||
if (!CreateNewIfNecessary) return ret;
|
||||
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
||||
ret.reset(new T);
|
||||
m_scratchpad[key] = ret;
|
||||
template<typename T>
|
||||
boost::shared_ptr<T> const
|
||||
get(void const* key, bool CreateNewIfNecessary=false) {
|
||||
using boost::shared_mutex;
|
||||
using boost::upgrade_lock;
|
||||
// T const* key = reinterpret_cast<T const*>(xkey);
|
||||
upgrade_lock<shared_mutex> lock(m_lock);
|
||||
iter_t m = m_scratchpad.find(key);
|
||||
boost::shared_ptr< T > ret;
|
||||
if (m != m_scratchpad.end()) {
|
||||
if (m->second == NULL && CreateNewIfNecessary) {
|
||||
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
||||
m->second.reset(new T);
|
||||
}
|
||||
ret = boost::static_pointer_cast< T >(m->second);
|
||||
return ret;
|
||||
}
|
||||
if (!CreateNewIfNecessary) return ret;
|
||||
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
||||
ret.reset(new T);
|
||||
m_scratchpad[key] = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ContextScope() { }
|
||||
ContextScope() { }
|
||||
|
||||
ContextScope(ContextScope const& other)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
|
||||
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
|
||||
m_scratchpad = other.m_scratchpad;
|
||||
}
|
||||
|
||||
};
|
||||
ContextScope(ContextScope const& other) {
|
||||
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
|
||||
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
|
||||
m_scratchpad = other.m_scratchpad;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
@ -218,17 +218,16 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
||||
void
|
||||
DecodeStepTranslation::
|
||||
ProcessLEGACY(TranslationOption const& in,
|
||||
DecodeStep const& decodeStep,
|
||||
PartialTranslOptColl &out,
|
||||
TranslationOptionCollection *toc,
|
||||
bool adhereTableLimit) const
|
||||
DecodeStep const& decodeStep,
|
||||
PartialTranslOptColl &out,
|
||||
TranslationOptionCollection *toc,
|
||||
bool adhereTableLimit) const
|
||||
{
|
||||
if (in.GetTargetPhrase().GetSize() == 0)
|
||||
{
|
||||
// word deletion
|
||||
out.Add(new TranslationOption(in));
|
||||
return;
|
||||
}
|
||||
if (in.GetTargetPhrase().GetSize() == 0) {
|
||||
// word deletion
|
||||
out.Add(new TranslationOption(in));
|
||||
return;
|
||||
}
|
||||
|
||||
// normal trans step
|
||||
WordsRange const& srcRange = in.GetSourceWordsRange();
|
||||
@ -241,34 +240,32 @@ ProcessLEGACY(TranslationOption const& in,
|
||||
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
|
||||
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
||||
|
||||
if (phraseColl != NULL)
|
||||
{
|
||||
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
||||
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
|
||||
? phraseColl->begin() + tableLimit : phraseColl->end());
|
||||
if (phraseColl != NULL) {
|
||||
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
||||
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
|
||||
? phraseColl->begin() + tableLimit : phraseColl->end());
|
||||
|
||||
for (iterTargetPhrase = phraseColl->begin();
|
||||
iterTargetPhrase != iterEnd;
|
||||
++iterTargetPhrase)
|
||||
{
|
||||
TargetPhrase const& targetPhrase = **iterTargetPhrase;
|
||||
if (targetPhrase.GetSize() != currSize ||
|
||||
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
|
||||
continue;
|
||||
for (iterTargetPhrase = phraseColl->begin();
|
||||
iterTargetPhrase != iterEnd;
|
||||
++iterTargetPhrase) {
|
||||
TargetPhrase const& targetPhrase = **iterTargetPhrase;
|
||||
if (targetPhrase.GetSize() != currSize ||
|
||||
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
|
||||
continue;
|
||||
|
||||
TargetPhrase outPhrase(inPhrase);
|
||||
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
||||
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
||||
TargetPhrase outPhrase(inPhrase);
|
||||
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
||||
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
||||
|
||||
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
|
||||
assert(newTransOpt != NULL);
|
||||
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
|
||||
assert(newTransOpt != NULL);
|
||||
|
||||
newTransOpt->SetInputPath(inputPath);
|
||||
newTransOpt->SetInputPath(inputPath);
|
||||
|
||||
out.Add(newTransOpt);
|
||||
out.Add(newTransOpt);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,16 +83,16 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
|
||||
|
||||
SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni): m_staticData(StaticData::Instance())
|
||||
{
|
||||
if (!m_params.LoadParam(mosesIni)) {
|
||||
cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
|
||||
exit(1);
|
||||
}
|
||||
if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
|
||||
cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
|
||||
exit(1);
|
||||
}
|
||||
if (!m_params.LoadParam(mosesIni)) {
|
||||
cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
|
||||
exit(1);
|
||||
}
|
||||
if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
|
||||
cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
util::rand_init();
|
||||
util::rand_init();
|
||||
|
||||
}
|
||||
|
||||
@ -114,13 +114,15 @@ string SimpleTranslationInterface::translate(const string &inputString)
|
||||
|
||||
boost::shared_ptr<InputType> source = ioWrapper->ReadInput();
|
||||
if (!source) return "Error: Source==null!!!";
|
||||
IFVERBOSE(1) { ResetUserTime(); }
|
||||
IFVERBOSE(1) {
|
||||
ResetUserTime();
|
||||
}
|
||||
|
||||
FeatureFunction::CallChangeSource(&*source);
|
||||
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask> task
|
||||
= TranslationTask::create(source, ioWrapper);
|
||||
= TranslationTask::create(source, ioWrapper);
|
||||
task->Run();
|
||||
|
||||
string output = outputStream.str();
|
||||
@ -147,10 +149,14 @@ int
|
||||
run_as_server()
|
||||
{
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
int port; params.SetParameter(port, "server-port", 8080);
|
||||
bool isSerial; params.SetParameter(isSerial, "serial", false);
|
||||
string logfile; params.SetParameter(logfile, "server-log", string(""));
|
||||
size_t num_threads; params.SetParameter(num_threads, "threads", size_t(10));
|
||||
int port;
|
||||
params.SetParameter(port, "server-port", 8080);
|
||||
bool isSerial;
|
||||
params.SetParameter(isSerial, "serial", false);
|
||||
string logfile;
|
||||
params.SetParameter(logfile, "server-log", string(""));
|
||||
size_t num_threads;
|
||||
params.SetParameter(num_threads, "threads", size_t(10));
|
||||
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
|
||||
|
||||
xmlrpc_c::registry myRegistry;
|
||||
@ -166,8 +172,9 @@ run_as_server()
|
||||
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
|
||||
|
||||
XVERBOSE(1,"Listening on port " << port << endl);
|
||||
if (isSerial) { while(1) myAbyssServer.runOnce(); }
|
||||
else myAbyssServer.run();
|
||||
if (isSerial) {
|
||||
while(1) myAbyssServer.runOnce();
|
||||
} else myAbyssServer.run();
|
||||
|
||||
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
|
||||
// #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
|
||||
@ -193,16 +200,15 @@ batch_run()
|
||||
// set up read/writing class:
|
||||
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
|
||||
UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object"
|
||||
<< " [" << HERE << "]");
|
||||
<< " [" << HERE << "]");
|
||||
|
||||
// check on weights
|
||||
const ScoreComponentCollection& weights = staticData.GetAllWeights();
|
||||
IFVERBOSE(2)
|
||||
{
|
||||
TRACE_ERR("The global weight vector looks like this: ");
|
||||
TRACE_ERR(weights);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
IFVERBOSE(2) {
|
||||
TRACE_ERR("The global weight vector looks like this: ");
|
||||
TRACE_ERR(weights);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
ThreadPool pool(staticData.ThreadCount());
|
||||
@ -214,57 +220,53 @@ batch_run()
|
||||
// main loop over set of input sentences
|
||||
|
||||
boost::shared_ptr<InputType> source;
|
||||
while ((source = ioWrapper->ReadInput()) != NULL)
|
||||
{
|
||||
IFVERBOSE(1) ResetUserTime();
|
||||
while ((source = ioWrapper->ReadInput()) != NULL) {
|
||||
IFVERBOSE(1) ResetUserTime();
|
||||
|
||||
FeatureFunction::CallChangeSource(source.get());
|
||||
FeatureFunction::CallChangeSource(source.get());
|
||||
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask>
|
||||
task = TranslationTask::create(source, ioWrapper);
|
||||
task->SetContextString(context_string);
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask>
|
||||
task = TranslationTask::create(source, ioWrapper);
|
||||
task->SetContextString(context_string);
|
||||
|
||||
// Allow for (sentence-)context-specific processing prior to
|
||||
// decoding. This can be used, for example, for context-sensitive
|
||||
// phrase lookup.
|
||||
FeatureFunction::SetupAll(*task);
|
||||
// Allow for (sentence-)context-specific processing prior to
|
||||
// decoding. This can be used, for example, for context-sensitive
|
||||
// phrase lookup.
|
||||
FeatureFunction::SetupAll(*task);
|
||||
|
||||
// execute task
|
||||
// execute task
|
||||
#ifdef WITH_THREADS
|
||||
#ifdef PT_UG
|
||||
// simulated post-editing requires threads (within the dynamic phrase tables)
|
||||
// but runs all sentences serially, to allow updating of the bitext.
|
||||
bool spe = params.isParamSpecified("spe-src");
|
||||
if (spe)
|
||||
{
|
||||
// simulated post-editing: always run single-threaded!
|
||||
task->Run();
|
||||
string src,trg,aln;
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
|
||||
{
|
||||
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
|
||||
if (sapt) sapt->add(src,trg,aln);
|
||||
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
|
||||
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
|
||||
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
|
||||
}
|
||||
}
|
||||
else pool.Submit(task);
|
||||
// simulated post-editing requires threads (within the dynamic phrase tables)
|
||||
// but runs all sentences serially, to allow updating of the bitext.
|
||||
bool spe = params.isParamSpecified("spe-src");
|
||||
if (spe) {
|
||||
// simulated post-editing: always run single-threaded!
|
||||
task->Run();
|
||||
string src,trg,aln;
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
|
||||
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
|
||||
if (sapt) sapt->add(src,trg,aln);
|
||||
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
|
||||
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
|
||||
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
|
||||
}
|
||||
} else pool.Submit(task);
|
||||
#else
|
||||
pool.Submit(task);
|
||||
pool.Submit(task);
|
||||
|
||||
#endif
|
||||
#else
|
||||
task->Run();
|
||||
task->Run();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// we are done, finishing up
|
||||
#ifdef WITH_THREADS
|
||||
@ -289,52 +291,49 @@ int decoder_main(int argc, char** argv)
|
||||
#ifdef NDEBUG
|
||||
try
|
||||
#endif
|
||||
{
|
||||
{
|
||||
#ifdef HAVE_PROTOBUF
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
#endif
|
||||
|
||||
// echo command line, if verbose
|
||||
IFVERBOSE(1)
|
||||
{
|
||||
TRACE_ERR("command: ");
|
||||
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
|
||||
TRACE_ERR(endl);
|
||||
}
|
||||
|
||||
// set number of significant decimals in output
|
||||
FixPrecision(cout);
|
||||
FixPrecision(cerr);
|
||||
|
||||
// load all the settings into the Parameter class
|
||||
// (stores them as strings, or array of strings)
|
||||
if (!params.LoadParam(argc,argv))
|
||||
exit(1);
|
||||
|
||||
// initialize all "global" variables, which are stored in StaticData
|
||||
// note: this also loads models such as the language model, etc.
|
||||
if (!StaticData::LoadDataStatic(¶ms, argv[0]))
|
||||
exit(1);
|
||||
|
||||
// setting "-show-weights" -> just dump out weights and exit
|
||||
if (params.isParamSpecified("show-weights"))
|
||||
{
|
||||
ShowWeights();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (params.GetParam("server"))
|
||||
return run_as_server();
|
||||
else
|
||||
return batch_run();
|
||||
|
||||
// echo command line, if verbose
|
||||
IFVERBOSE(1) {
|
||||
TRACE_ERR("command: ");
|
||||
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
|
||||
TRACE_ERR(endl);
|
||||
}
|
||||
|
||||
// set number of significant decimals in output
|
||||
FixPrecision(cout);
|
||||
FixPrecision(cerr);
|
||||
|
||||
// load all the settings into the Parameter class
|
||||
// (stores them as strings, or array of strings)
|
||||
if (!params.LoadParam(argc,argv))
|
||||
exit(1);
|
||||
|
||||
// initialize all "global" variables, which are stored in StaticData
|
||||
// note: this also loads models such as the language model, etc.
|
||||
if (!StaticData::LoadDataStatic(¶ms, argv[0]))
|
||||
exit(1);
|
||||
|
||||
// setting "-show-weights" -> just dump out weights and exit
|
||||
if (params.isParamSpecified("show-weights")) {
|
||||
ShowWeights();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (params.GetParam("server"))
|
||||
return run_as_server();
|
||||
else
|
||||
return batch_run();
|
||||
|
||||
}
|
||||
#ifdef NDEBUG
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
std::cerr << "Exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
catch (const std::exception &e) {
|
||||
std::cerr << "Exception: " << e.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,9 @@ public:
|
||||
~SimpleTranslationInterface();
|
||||
std::string translate(const std::string &input);
|
||||
Moses::StaticData& getStaticData();
|
||||
Moses::Parameter& getParameters(){ return m_params; }
|
||||
Moses::Parameter& getParameters() {
|
||||
return m_params;
|
||||
}
|
||||
private:
|
||||
SimpleTranslationInterface();
|
||||
Moses::Parameter m_params;
|
||||
|
@ -157,32 +157,26 @@ FeatureFactory
|
||||
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
|
||||
|
||||
|
||||
if (feature->GetNumScoreComponents())
|
||||
{
|
||||
if (weights.size() == 0)
|
||||
{
|
||||
weights = feature->DefaultWeights();
|
||||
if (weights.size() == 0)
|
||||
{
|
||||
TRACE_ERR("WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". This FF does not supply default values.\n"
|
||||
<< "WARNING: Auto-initializing all weights for this FF to 1.0");
|
||||
weights.assign(feature->GetNumScoreComponents(),1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRACE_ERR("WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". Using default values supplied by FF.");
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
|
||||
"FATAL ERROR: Mismatch in number of features and number "
|
||||
<< "of weights for Feature Function " << featureName
|
||||
<< " (features: " << feature->GetNumScoreComponents()
|
||||
<< " vs. weights: " << weights.size() << ")");
|
||||
static_data.SetWeights(feature, weights);
|
||||
if (feature->GetNumScoreComponents()) {
|
||||
if (weights.size() == 0) {
|
||||
weights = feature->DefaultWeights();
|
||||
if (weights.size() == 0) {
|
||||
TRACE_ERR("WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". This FF does not supply default values.\n"
|
||||
<< "WARNING: Auto-initializing all weights for this FF to 1.0");
|
||||
weights.assign(feature->GetNumScoreComponents(),1.0);
|
||||
} else {
|
||||
TRACE_ERR("WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". Using default values supplied by FF.");
|
||||
}
|
||||
}
|
||||
else if (feature->IsTuneable())
|
||||
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
|
||||
"FATAL ERROR: Mismatch in number of features and number "
|
||||
<< "of weights for Feature Function " << featureName
|
||||
<< " (features: " << feature->GetNumScoreComponents()
|
||||
<< " vs. weights: " << weights.size() << ")");
|
||||
static_data.SetWeights(feature, weights);
|
||||
} else if (feature->IsTuneable())
|
||||
static_data.SetWeights(feature, weights);
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ void FeatureFunction::CallChangeSource(InputType * const&input)
|
||||
void FeatureFunction::SetupAll(TranslationTask const& ttask)
|
||||
{
|
||||
BOOST_FOREACH(FeatureFunction* ff, s_staticColl)
|
||||
ff->Setup(ttask);
|
||||
ff->Setup(ttask);
|
||||
}
|
||||
|
||||
FeatureFunction::
|
||||
@ -193,17 +193,23 @@ void FeatureFunction::SetTuneableComponents(const std::string& value)
|
||||
void
|
||||
FeatureFunction
|
||||
::InitializeForInput(ttasksptr const& ttask)
|
||||
{ InitializeForInput(*(ttask->GetSource().get())); }
|
||||
{
|
||||
InitializeForInput(*(ttask->GetSource().get()));
|
||||
}
|
||||
|
||||
void
|
||||
FeatureFunction
|
||||
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
|
||||
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
|
||||
{
|
||||
CleanUpAfterSentenceProcessing(*(ttask->GetSource().get()));
|
||||
}
|
||||
|
||||
size_t
|
||||
FeatureFunction
|
||||
::GetIndex() const
|
||||
{ return m_index; }
|
||||
{
|
||||
return m_index;
|
||||
}
|
||||
|
||||
|
||||
/// set index
|
||||
|
@ -136,7 +136,9 @@ public:
|
||||
CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
|
||||
|
||||
const std::string &
|
||||
GetArgLine() const { return m_argLine; }
|
||||
GetArgLine() const {
|
||||
return m_argLine;
|
||||
}
|
||||
|
||||
// given a target phrase containing only factors specified in mask
|
||||
// return true if the feature function can be evaluated
|
||||
@ -153,8 +155,8 @@ public:
|
||||
// source from the input sentence
|
||||
virtual void
|
||||
EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
|
||||
ScoreComponentCollection& scoreBreakdown,
|
||||
ScoreComponentCollection& estimatedFutureScore) const = 0;
|
||||
ScoreComponentCollection& scoreBreakdown,
|
||||
ScoreComponentCollection& estimatedFutureScore) const = 0;
|
||||
|
||||
// override this method if you want to change the input before decoding
|
||||
virtual void ChangeSource(InputType * const&input) const { }
|
||||
|
@ -147,8 +147,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
|
||||
const std::string &label = (*itx)->GetLabel();
|
||||
if (!label.empty() && label[0] == '^') {
|
||||
(*itx)->GetUnbinarizedChildren(ret);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ret.push_back(*itx);
|
||||
}
|
||||
}
|
||||
|
@ -96,8 +96,7 @@ public:
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// Python-like generator that yields next nonterminal leaf on every call
|
||||
$generator(leafNT)
|
||||
{
|
||||
$generator(leafNT) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNT(InternalTree* root = 0): tree(root) {}
|
||||
@ -116,8 +115,7 @@ public:
|
||||
|
||||
|
||||
// Python-like generator that yields the parent of the next nonterminal leaf on every call
|
||||
$generator(leafNTParent)
|
||||
{
|
||||
$generator(leafNTParent) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNTParent(InternalTree* root = 0): tree(root) {}
|
||||
@ -135,8 +133,7 @@ public:
|
||||
};
|
||||
|
||||
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
|
||||
$generator(leafNTPath)
|
||||
{
|
||||
$generator(leafNTPath) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
std::vector<InternalTree*> * path;
|
||||
|
@ -66,9 +66,9 @@ LexicalReordering(const std::string &line)
|
||||
|
||||
// sanity check: number of default scores
|
||||
size_t numScores
|
||||
= m_numScoreComponents
|
||||
= m_numScoreComponents
|
||||
= m_numTuneableComponents
|
||||
= m_configuration->GetNumScoreComponents();
|
||||
= m_configuration->GetNumScoreComponents();
|
||||
UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
|
||||
"wrong number of default scores (" << m_defaultScores.size()
|
||||
<< ") for lexicalized reordering model (expected "
|
||||
@ -89,7 +89,7 @@ Load()
|
||||
typedef LexicalReorderingTable LRTable;
|
||||
if (m_filePath.size())
|
||||
m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
|
||||
m_factorsE, std::vector<FactorType>()));
|
||||
m_factorsE, std::vector<FactorType>()));
|
||||
}
|
||||
|
||||
Scores
|
||||
@ -158,7 +158,7 @@ LexicalReordering::
|
||||
SetCache(TranslationOptionList& tol) const
|
||||
{
|
||||
BOOST_FOREACH(TranslationOption* to, tol)
|
||||
this->SetCache(*to);
|
||||
this->SetCache(*to);
|
||||
}
|
||||
|
||||
|
||||
|
@ -101,7 +101,7 @@ GetOrientation(int const reoDistance) const
|
||||
// this one is for HierarchicalReorderingBackwardState
|
||||
return ((m_modelType == LeftRight)
|
||||
? (reoDistance >= 1) ? R : L
|
||||
: (reoDistance == 1) ? M
|
||||
: (reoDistance == 1) ? M
|
||||
: (m_modelType == Monotonic) ? NM
|
||||
: (reoDistance == -1) ? S
|
||||
: (m_modelType == MSD) ? D
|
||||
@ -115,7 +115,7 @@ GetOrientation(WordsRange const& prev, WordsRange const& cur,
|
||||
{
|
||||
return ((m_modelType == LeftRight)
|
||||
? cur.GetStartPos() > prev.GetEndPos() ? R : L
|
||||
: IsMonotonicStep(prev,cur,cov) ? M
|
||||
: IsMonotonicStep(prev,cur,cov) ? M
|
||||
: (m_modelType == Monotonic) ? NM
|
||||
: IsSwap(prev,cur,cov) ? S
|
||||
: (m_modelType == MSD) ? D
|
||||
@ -263,7 +263,7 @@ CopyScores(ScoreComponentCollection* accum,
|
||||
|
||||
const SparseReordering* sparse = m_configuration.GetSparseReordering();
|
||||
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
|
||||
m_direction, accum);
|
||||
m_direction, accum);
|
||||
}
|
||||
|
||||
|
||||
@ -342,7 +342,7 @@ Expand(const TranslationOption& topt, const InputType& input,
|
||||
LRModel const& lrmodel = m_configuration;
|
||||
WordsRange const cur = topt.GetSourceWordsRange();
|
||||
LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
|
||||
: lrmodel.GetOrientation(m_prevRange,cur));
|
||||
: lrmodel.GetOrientation(m_prevRange,cur));
|
||||
CopyScores(scores, topt, input, reoType);
|
||||
}
|
||||
return new PhraseBasedReorderingState(this, topt);
|
||||
|
@ -44,19 +44,18 @@ public:
|
||||
static const ReorderingType L = 1; // left
|
||||
static const ReorderingType MAX = 3; // largest possible
|
||||
#else
|
||||
enum ReorderingType
|
||||
{
|
||||
M = 0, // monotonic
|
||||
NM = 1, // non-monotonic
|
||||
S = 1, // swap
|
||||
D = 2, // discontinuous
|
||||
DL = 2, // discontinuous, left
|
||||
DR = 3, // discontinuous, right
|
||||
R = 0, // right
|
||||
L = 1, // left
|
||||
MAX = 3, // largest possible
|
||||
NONE = 4 // largest possible
|
||||
};
|
||||
enum ReorderingType {
|
||||
M = 0, // monotonic
|
||||
NM = 1, // non-monotonic
|
||||
S = 1, // swap
|
||||
D = 2, // discontinuous
|
||||
DL = 2, // discontinuous, left
|
||||
DR = 3, // discontinuous, right
|
||||
R = 0, // right
|
||||
L = 1, // left
|
||||
MAX = 3, // largest possible
|
||||
NONE = 4 // largest possible
|
||||
};
|
||||
#endif
|
||||
// determine orientation, depending on model:
|
||||
|
||||
|
@ -114,10 +114,10 @@ void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id,
|
||||
position <= SparseReorderingFeatureKey::Last; ++position) {
|
||||
for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) {
|
||||
SparseReorderingFeatureKey
|
||||
key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
|
||||
factor, isCluster,
|
||||
static_cast<SparseReorderingFeatureKey::Position>(position),
|
||||
side, static_cast<LRModel::ReorderingType>(reoType));
|
||||
key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
|
||||
factor, isCluster,
|
||||
static_cast<SparseReorderingFeatureKey::Position>(position),
|
||||
side, static_cast<LRModel::ReorderingType>(reoType));
|
||||
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
|
||||
}
|
||||
}
|
||||
|
@ -71,21 +71,18 @@ void Model1Vocabulary::Load(const std::string& fileName)
|
||||
std::string line;
|
||||
|
||||
unsigned i = 0;
|
||||
if ( getline(inFile, line) ) // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this
|
||||
{
|
||||
if ( getline(inFile, line) ) { // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
unsigned id = Scan<unsigned>(tokens[0]);
|
||||
if (! ( (id == 1) && (tokens[1] == "UNK") ))
|
||||
{
|
||||
if (! ( (id == 1) && (tokens[1] == "UNK") )) {
|
||||
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
|
||||
bool stored = Store(factor, id);
|
||||
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
|
||||
}
|
||||
}
|
||||
while ( getline(inFile, line) )
|
||||
{
|
||||
while ( getline(inFile, line) ) {
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
@ -104,8 +101,7 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
|
||||
std::string line;
|
||||
|
||||
unsigned i = 0;
|
||||
while ( getline(inFile, line) )
|
||||
{
|
||||
while ( getline(inFile, line) ) {
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
@ -183,35 +179,31 @@ void Model1Feature::Load()
|
||||
}
|
||||
|
||||
void Model1Feature::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
const Sentence& sentence = static_cast<const Sentence&>(input);
|
||||
float score = 0.0;
|
||||
float norm = TransformScore(1+sentence.GetSize());
|
||||
|
||||
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
|
||||
{
|
||||
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
|
||||
const Word &wordT = targetPhrase.GetWord(posT);
|
||||
if ( !wordT.IsNonTerminal() )
|
||||
{
|
||||
if ( !wordT.IsNonTerminal() ) {
|
||||
float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word
|
||||
|
||||
// cache lookup
|
||||
bool foundInCache = false;
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
||||
#endif
|
||||
#endif
|
||||
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input);
|
||||
if (sentenceCache != m_cache.end())
|
||||
{
|
||||
if (sentenceCache != m_cache.end()) {
|
||||
boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]);
|
||||
if (cacheHit != sentenceCache->second.end())
|
||||
{
|
||||
if (cacheHit != sentenceCache->second.end()) {
|
||||
foundInCache = true;
|
||||
score += cacheHit->second;
|
||||
FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl);
|
||||
@ -219,10 +211,8 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundInCache)
|
||||
{
|
||||
for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) // ignore <s> and </s>
|
||||
{
|
||||
if (!foundInCache) {
|
||||
for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
|
||||
const Word &wordS = sentence.GetWord(posS);
|
||||
float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
|
||||
FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
|
||||
@ -231,10 +221,10 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
|
||||
float thisWordScore = TransformScore(thisWordProb) - norm;
|
||||
FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
#ifdef WITH_THREADS
|
||||
// need to update cache; write lock
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#endif
|
||||
#endif
|
||||
m_cache[&input][wordT[0]] = thisWordScore;
|
||||
}
|
||||
score += thisWordScore;
|
||||
@ -247,14 +237,13 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
|
||||
|
||||
void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
#ifdef WITH_THREADS
|
||||
// need to update cache; write lock
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#endif
|
||||
#endif
|
||||
// clear cache
|
||||
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::iterator sentenceCache = m_cache.find(&source);
|
||||
if (sentenceCache != m_cache.end())
|
||||
{
|
||||
if (sentenceCache != m_cache.end()) {
|
||||
sentenceCache->second.clear();
|
||||
m_cache.erase(sentenceCache);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ class Model1Vocabulary
|
||||
{
|
||||
public:
|
||||
|
||||
#define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX
|
||||
#define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX
|
||||
static const std::string GIZANULL;
|
||||
|
||||
Model1Vocabulary();
|
||||
@ -103,10 +103,10 @@ private:
|
||||
|
||||
// cache
|
||||
mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache;
|
||||
#ifdef WITH_THREADS
|
||||
#ifdef WITH_THREADS
|
||||
// reader-writer lock
|
||||
mutable boost::shared_mutex m_accessLock;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -288,7 +288,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
if (currTarPhr.GetAlignNonTerm().GetSize() != 0) {
|
||||
const boost::shared_ptr<void> data = currTarPhr.GetData("Orientation");
|
||||
UTIL_THROW_IF2(!data, GetScoreProducerDescription()
|
||||
<< ": Orientation data not set in target phrase. ");
|
||||
<< ": Orientation data not set in target phrase. ");
|
||||
reoClassData = static_cast<const PhraseOrientationFeature::ReoClassData*>( data.get() );
|
||||
}
|
||||
|
||||
|
@ -301,15 +301,15 @@ class PhraseOrientationFeature : public StatefulFeatureFunction
|
||||
public:
|
||||
|
||||
struct ReoClassData {
|
||||
public:
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
bool lastNonTerminalIsBoundary;
|
||||
bool lastNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool lastNonTerminalFollowingSourceSpanIsAligned;
|
||||
public:
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
bool lastNonTerminalIsBoundary;
|
||||
bool lastNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool lastNonTerminalFollowingSourceSpanIsAligned;
|
||||
};
|
||||
|
||||
PhraseOrientationFeature(const std::string &line);
|
||||
|
@ -39,9 +39,9 @@ void RulePairUnlexicalizedSource::SetParameter(const std::string& key, const std
|
||||
|
||||
|
||||
void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
|
||||
if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
|
||||
@ -51,8 +51,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t posS=0; posS<source.GetSize(); ++posS)
|
||||
{
|
||||
for (size_t posS=0; posS<source.GetSize(); ++posS) {
|
||||
const Word &wordS = source.GetWord(posS);
|
||||
if ( !wordS.IsNonTerminal() ) {
|
||||
return;
|
||||
@ -61,8 +60,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
|
||||
|
||||
ostringstream namestr;
|
||||
|
||||
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
|
||||
{
|
||||
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
|
||||
const Word &wordT = targetPhrase.GetWord(posT);
|
||||
const Factor* factorT = wordT[0];
|
||||
if ( wordT.IsNonTerminal() ) {
|
||||
@ -78,8 +76,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
|
||||
namestr << targetPhraseLHS->GetString() << "|";
|
||||
|
||||
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
|
||||
it!=targetPhrase.GetAlignNonTerm().end(); ++it)
|
||||
{
|
||||
it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
|
||||
namestr << "|" << it->first << "-" << it->second;
|
||||
}
|
||||
|
||||
|
@ -26,16 +26,16 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
if (IsGlueRule(source)) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
float score = 0;
|
||||
|
||||
if (source.GetSize() > 0 && source.Front().IsNonTerminal()) {
|
||||
++score;
|
||||
++score;
|
||||
}
|
||||
if (source.GetSize() > 1 && source.Back().IsNonTerminal()) {
|
||||
++score;
|
||||
++score;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -61,23 +61,20 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
|
||||
*/
|
||||
|
||||
if (m_perScope) {
|
||||
UTIL_THROW_IF2(m_numScoreComponents <= score,
|
||||
"Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score);
|
||||
vector<float> scores(m_numScoreComponents, 0);
|
||||
scores[score] = 1;
|
||||
UTIL_THROW_IF2(m_numScoreComponents <= score,
|
||||
"Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score);
|
||||
vector<float> scores(m_numScoreComponents, 0);
|
||||
scores[score] = 1;
|
||||
|
||||
if (m_futureCostOnly) {
|
||||
estimatedFutureScore.PlusEquals(this, scores);
|
||||
}
|
||||
else {
|
||||
scoreBreakdown.PlusEquals(this, scores);
|
||||
}
|
||||
}
|
||||
else if (m_futureCostOnly) {
|
||||
estimatedFutureScore.PlusEquals(this, score);
|
||||
}
|
||||
else {
|
||||
scoreBreakdown.PlusEquals(this, score);
|
||||
if (m_futureCostOnly) {
|
||||
estimatedFutureScore.PlusEquals(this, scores);
|
||||
} else {
|
||||
scoreBreakdown.PlusEquals(this, scores);
|
||||
}
|
||||
} else if (m_futureCostOnly) {
|
||||
estimatedFutureScore.PlusEquals(this, score);
|
||||
} else {
|
||||
scoreBreakdown.PlusEquals(this, score);
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,14 +82,11 @@ void RuleScope::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "source-syntax") {
|
||||
m_sourceSyntax = Scan<bool>(value);
|
||||
}
|
||||
else if (key == "per-scope") {
|
||||
m_perScope = Scan<bool>(value);
|
||||
}
|
||||
else if ("future-cost-only") {
|
||||
m_futureCostOnly = Scan<bool>(value);
|
||||
}
|
||||
else {
|
||||
} else if (key == "per-scope") {
|
||||
m_perScope = Scan<bool>(value);
|
||||
} else if ("future-cost-only") {
|
||||
m_futureCostOnly = Scan<bool>(value);
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
|
||||
|
||||
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
|
||||
if (m_binarized && full_sentence) {
|
||||
mytree->Unbinarize();
|
||||
mytree->Unbinarize();
|
||||
}
|
||||
|
||||
return new TreeState(mytree);
|
||||
|
@ -183,8 +183,8 @@ public:
|
||||
|
||||
// optionally update translation options using leave-one-out
|
||||
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
|
||||
? LeaveOneOut(translationOptionList, correct)
|
||||
: std::vector<bool>(translationOptionList.size(), true);
|
||||
? LeaveOneOut(translationOptionList, correct)
|
||||
: std::vector<bool>(translationOptionList.size(), true);
|
||||
|
||||
// check whether we (still) have some correct translation
|
||||
int firstCorrect = -1;
|
||||
@ -312,11 +312,11 @@ public:
|
||||
return;
|
||||
|
||||
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
|
||||
"This feature function requires the TabbedSentence input type");
|
||||
"This feature function requires the TabbedSentence input type");
|
||||
|
||||
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
|
||||
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
|
||||
"TabbedSentence must contain target<tab>alignment");
|
||||
"TabbedSentence must contain target<tab>alignment");
|
||||
|
||||
// target sentence represented as a phrase
|
||||
Phrase *target = new Phrase();
|
||||
|
1171
moses/Hypothesis.cpp
1171
moses/Hypothesis.cpp
File diff suppressed because it is too large
Load Diff
@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore {
|
||||
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
|
||||
pool.freeObject(hypo); \
|
||||
} \
|
||||
|
||||
|
||||
#else
|
||||
#define FREEHYPO(hypo) delete hypo
|
||||
#endif
|
||||
|
@ -161,7 +161,7 @@ public:
|
||||
return m_detailTreeFragmentsOutputCollector.get();
|
||||
}
|
||||
|
||||
void SetInputStreamFromString(std::istringstream &input){
|
||||
void SetInputStreamFromString(std::istringstream &input) {
|
||||
m_inputStream = &input;
|
||||
}
|
||||
|
||||
|
@ -13,12 +13,14 @@ namespace Moses
|
||||
|
||||
typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap;
|
||||
|
||||
RDLM::~RDLM() {
|
||||
RDLM::~RDLM()
|
||||
{
|
||||
delete lm_head_base_instance_;
|
||||
delete lm_label_base_instance_;
|
||||
}
|
||||
|
||||
void RDLM::Load() {
|
||||
void RDLM::Load()
|
||||
{
|
||||
|
||||
lm_head_base_instance_ = new nplm::neuralTM();
|
||||
lm_head_base_instance_->read(m_path_head_lm);
|
||||
@ -87,8 +89,8 @@ void RDLM::Load() {
|
||||
|
||||
// just score provided file, then exit.
|
||||
if (!m_debugPath.empty()) {
|
||||
ScoreFile(m_debugPath);
|
||||
exit(1);
|
||||
ScoreFile(m_debugPath);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// {
|
||||
@ -202,8 +204,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
// ignore glue rules
|
||||
if (root->GetLabel() == m_glueSymbol) {
|
||||
// recursion
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
|
||||
{
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
|
||||
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
|
||||
}
|
||||
return;
|
||||
@ -213,11 +214,11 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) {
|
||||
// recursion
|
||||
if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) {
|
||||
root = back_pointers.find(root)->second.get();
|
||||
rescoring_levels = m_context_up-1;
|
||||
root = back_pointers.find(root)->second.get();
|
||||
rescoring_levels = m_context_up-1;
|
||||
}
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
|
||||
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
|
||||
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -239,35 +240,34 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) {
|
||||
// root of tree: score without context
|
||||
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
|
||||
std::vector<int> ngram_head_null (static_head_null);
|
||||
ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
|
||||
if (m_isPretermBackoff && ngram_head_null.back() == 0) {
|
||||
ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
|
||||
}
|
||||
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
|
||||
std::vector<int>::iterator it = ngram_head_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
}
|
||||
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
|
||||
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
else {
|
||||
boost::hash_combine(boundary_hash, ngram_head_null.back());
|
||||
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
std::vector<int> ngram_head_null (static_head_null);
|
||||
ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
|
||||
if (m_isPretermBackoff && ngram_head_null.back() == 0) {
|
||||
ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
|
||||
}
|
||||
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
|
||||
std::vector<int>::iterator it = ngram_head_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
}
|
||||
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
|
||||
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
} else {
|
||||
boost::hash_combine(boundary_hash, ngram_head_null.back());
|
||||
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
}
|
||||
return;
|
||||
// we only need to re-visit previous hypotheses if we have more context available.
|
||||
// we only need to re-visit previous hypotheses if we have more context available.
|
||||
} else if (root->IsLeafNT()) {
|
||||
if (m_context_up > 1 && ancestor_heads.size()) {
|
||||
root = back_pointers.find(root)->second.get();
|
||||
@ -276,8 +276,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
return;
|
||||
}
|
||||
rescoring_levels = m_context_up-1;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -302,19 +301,17 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
int reached_end = 0;
|
||||
int label_idx, label_idx_out;
|
||||
if (m_binarized && head_label[0] == '^') {
|
||||
virtual_head = true;
|
||||
if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) {
|
||||
reached_end = 1; //indicate that we've seen the first symbol of the RHS
|
||||
}
|
||||
else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) {
|
||||
reached_end = 2; // indicate that we've seen the last symbol of the RHS
|
||||
}
|
||||
// with 'full' binarization, direction is encoded in 2nd char
|
||||
std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
|
||||
label_idx = lm_label->lookup_input_word(clipped_label);
|
||||
label_idx_out = lm_label->lookup_output_word(clipped_label);
|
||||
}
|
||||
else {
|
||||
virtual_head = true;
|
||||
if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) {
|
||||
reached_end = 1; //indicate that we've seen the first symbol of the RHS
|
||||
} else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) {
|
||||
reached_end = 2; // indicate that we've seen the last symbol of the RHS
|
||||
}
|
||||
// with 'full' binarization, direction is encoded in 2nd char
|
||||
std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
|
||||
label_idx = lm_label->lookup_input_word(clipped_label);
|
||||
label_idx_out = lm_label->lookup_output_word(clipped_label);
|
||||
} else {
|
||||
reached_end = 3; // indicate that we've seen first and last symbol of the RHS
|
||||
label_idx = lm_label->lookup_input_word(head_label);
|
||||
label_idx_out = lm_label->lookup_output_word(head_label);
|
||||
@ -324,49 +321,47 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
|
||||
// root of tree: score without context
|
||||
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
|
||||
if (head_idx != static_dummy_head && head_idx != static_head_head) {
|
||||
std::vector<int> ngram_head_null (static_head_null);
|
||||
*(ngram_head_null.end()-2) = label_idx;
|
||||
ngram_head_null.back() = head_ids.second;
|
||||
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
|
||||
std::vector<int>::iterator it = ngram_head_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
else {
|
||||
boost::hash_combine(boundary_hash, ngram_head_null.back());
|
||||
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
}
|
||||
std::vector<int> ngram_label_null (static_label_null);
|
||||
ngram_label_null.back() = label_idx_out;
|
||||
if (head_idx != static_dummy_head && head_idx != static_head_head) {
|
||||
std::vector<int> ngram_head_null (static_head_null);
|
||||
*(ngram_head_null.end()-2) = label_idx;
|
||||
ngram_head_null.back() = head_ids.second;
|
||||
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
|
||||
std::vector<int>::iterator it = ngram_label_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
|
||||
}
|
||||
else {
|
||||
boost::hash_combine(boundary_hash, ngram_label_null.back());
|
||||
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
|
||||
std::vector<int>::iterator it = ngram_head_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
} else {
|
||||
boost::hash_combine(boundary_hash, ngram_head_null.back());
|
||||
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
|
||||
}
|
||||
}
|
||||
std::vector<int> ngram_label_null (static_label_null);
|
||||
ngram_label_null.back() = label_idx_out;
|
||||
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
|
||||
std::vector<int>::iterator it = ngram_label_null.begin();
|
||||
std::fill_n(it, m_context_left, static_start_head);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_left, static_start_label);
|
||||
it += m_context_left;
|
||||
std::fill_n(it, m_context_right, static_stop_head);
|
||||
it += m_context_right;
|
||||
std::fill_n(it, m_context_right, static_stop_label);
|
||||
it += m_context_right;
|
||||
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
|
||||
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
|
||||
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
|
||||
} else {
|
||||
boost::hash_combine(boundary_hash, ngram_label_null.back());
|
||||
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
|
||||
}
|
||||
}
|
||||
|
||||
ancestor_heads.push_back(head_idx);
|
||||
@ -374,15 +369,14 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
|
||||
if (virtual_head) {
|
||||
num_virtual = m_context_up;
|
||||
}
|
||||
else if (num_virtual) {
|
||||
--num_virtual;
|
||||
} else if (num_virtual) {
|
||||
--num_virtual;
|
||||
}
|
||||
|
||||
|
||||
// fill ancestor context (same for all children)
|
||||
if (context_up_nonempty < m_context_up) {
|
||||
++context_up_nonempty;
|
||||
++context_up_nonempty;
|
||||
}
|
||||
size_t up_padding = m_context_up - context_up_nonempty;
|
||||
|
||||
@ -439,13 +433,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
std::vector<int>::iterator it = ngram.begin();
|
||||
|
||||
if (left_padding > 0) {
|
||||
it += left_padding;
|
||||
it += left_padding;
|
||||
}
|
||||
|
||||
it = std::copy(heads.begin()+left_offset, heads.begin()+i, it);
|
||||
|
||||
if (left_padding > 0) {
|
||||
it += left_padding;
|
||||
it += left_padding;
|
||||
}
|
||||
|
||||
it = std::copy(labels.begin()+left_offset, labels.begin()+i, it);
|
||||
@ -453,33 +447,30 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
it = std::copy(heads.begin()+i+1, heads.begin()+right_offset, it);
|
||||
|
||||
if (right_padding > 0) {
|
||||
if (reached_end == 2 || reached_end == 3) {
|
||||
std::fill_n(it, right_padding, static_stop_head);
|
||||
it += right_padding;
|
||||
}
|
||||
else {
|
||||
std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it);
|
||||
}
|
||||
if (reached_end == 2 || reached_end == 3) {
|
||||
std::fill_n(it, right_padding, static_stop_head);
|
||||
it += right_padding;
|
||||
} else {
|
||||
std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it);
|
||||
}
|
||||
}
|
||||
|
||||
it = std::copy(labels.begin()+i+1, labels.begin()+right_offset, it);
|
||||
|
||||
if (right_padding > 0) {
|
||||
if (reached_end == 2 || reached_end == 3) {
|
||||
std::fill_n(it, right_padding, static_stop_label);
|
||||
it += right_padding;
|
||||
}
|
||||
else {
|
||||
std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it);
|
||||
}
|
||||
if (reached_end == 2 || reached_end == 3) {
|
||||
std::fill_n(it, right_padding, static_stop_label);
|
||||
it += right_padding;
|
||||
} else {
|
||||
std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it);
|
||||
}
|
||||
}
|
||||
|
||||
ngram.back() = labels_output[i];
|
||||
|
||||
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
|
||||
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
boost::hash_combine(boundary_hash, ngram.back());
|
||||
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
|
||||
}
|
||||
@ -492,8 +483,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
|
||||
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
|
||||
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
boost::hash_combine(boundary_hash, ngram.back());
|
||||
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
|
||||
}
|
||||
@ -502,25 +492,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
|
||||
|
||||
// next time, we need to add less start symbol padding
|
||||
if (left_padding)
|
||||
left_padding--;
|
||||
left_padding--;
|
||||
else
|
||||
left_offset++;
|
||||
left_offset++;
|
||||
|
||||
if (right_offset < heads.size())
|
||||
right_offset++;
|
||||
right_offset++;
|
||||
else
|
||||
right_padding++;
|
||||
right_padding++;
|
||||
}
|
||||
|
||||
|
||||
if (rescoring_levels == 1) {
|
||||
ancestor_heads.pop_back();
|
||||
ancestor_labels.pop_back();
|
||||
return;
|
||||
ancestor_heads.pop_back();
|
||||
ancestor_labels.pop_back();
|
||||
return;
|
||||
}
|
||||
// recursion
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
|
||||
{
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
|
||||
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1);
|
||||
}
|
||||
ancestor_heads.pop_back();
|
||||
@ -531,19 +520,17 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
|
||||
{
|
||||
InternalTree *tree;
|
||||
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
|
||||
{
|
||||
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
|
||||
if ((*it)->IsLeafNT()) {
|
||||
tree = back_pointers.find(it->get())->second.get();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
tree = it->get();
|
||||
}
|
||||
|
||||
if (m_binarized && tree->GetLabel()[0] == '^') {
|
||||
head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
|
||||
if (head_ptr != NULL && !m_isPTKVZ) {
|
||||
return head_ptr;
|
||||
head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
|
||||
if (head_ptr != NULL && !m_isPTKVZ) {
|
||||
return head_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -563,8 +550,7 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
|
||||
for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) {
|
||||
if ((*it2)->IsLeafNT()) {
|
||||
tree2 = back_pointers.find(it2->get())->second.get();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
tree2 = it2->get();
|
||||
}
|
||||
if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) {
|
||||
@ -602,18 +588,18 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
|
||||
// extract head words / labels
|
||||
for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) {
|
||||
if ((*itx)->IsTerminal()) {
|
||||
std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
|
||||
std::cerr << "children: ";
|
||||
for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) {
|
||||
std::cerr << (*itx2)->GetLabel() << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
// resize vectors (should we throw exception instead?)
|
||||
heads.pop_back();
|
||||
labels.pop_back();
|
||||
heads_output.pop_back();
|
||||
labels_output.pop_back();
|
||||
continue;
|
||||
std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
|
||||
std::cerr << "children: ";
|
||||
for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) {
|
||||
std::cerr << (*itx2)->GetLabel() << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
// resize vectors (should we throw exception instead?)
|
||||
heads.pop_back();
|
||||
labels.pop_back();
|
||||
heads_output.pop_back();
|
||||
labels_output.pop_back();
|
||||
continue;
|
||||
}
|
||||
InternalTree* child = itx->get();
|
||||
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
|
||||
@ -659,8 +645,7 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
|
||||
}
|
||||
if (m_sharedVocab) {
|
||||
IDs.second = IDs.first;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
IDs.second = lm_head_base_instance_->lookup_output_word(head);
|
||||
if (m_isPretermBackoff && IDs.second == 0) {
|
||||
IDs.second = lm_head_base_instance_->lookup_output_word(preterminal);
|
||||
@ -672,12 +657,12 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
|
||||
void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const
|
||||
{
|
||||
for (size_t i = 0; i < ngram.size()-1; i++) {
|
||||
std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " ";
|
||||
std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " ";
|
||||
}
|
||||
std::cerr << lm->get_output_vocabulary().words()[ngram.back()] << " ";
|
||||
|
||||
for (size_t i = 0; i < ngram.size(); i++) {
|
||||
std::cerr << ngram[i] << " ";
|
||||
std::cerr << ngram[i] << " ";
|
||||
}
|
||||
std::cerr << "score: " << lm->lookup_ngram(ngram) << std::endl;
|
||||
}
|
||||
@ -691,32 +676,31 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto
|
||||
bool found = false;
|
||||
InternalTree::leafNT next_leafNT(root);
|
||||
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
ret[it->get()] = *it_prev;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
ret[it->get()] = *it_prev;
|
||||
} else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void RDLM::ScoreFile(std::string &path)
|
||||
{
|
||||
InputFileStream inStream(path);
|
||||
std::string line, null;
|
||||
std::vector<int> ancestor_heads(m_context_up, static_root_head);
|
||||
std::vector<int> ancestor_labels(m_context_up, static_root_label);
|
||||
while(getline(inStream, line)) {
|
||||
TreePointerMap back_pointers;
|
||||
boost::array<float, 4> score;
|
||||
score.fill(0);
|
||||
InternalTree* mytree (new InternalTree(line));
|
||||
size_t boundary_hash = 0;
|
||||
Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
|
||||
std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
|
||||
}
|
||||
InputFileStream inStream(path);
|
||||
std::string line, null;
|
||||
std::vector<int> ancestor_heads(m_context_up, static_root_head);
|
||||
std::vector<int> ancestor_labels(m_context_up, static_root_label);
|
||||
while(getline(inStream, line)) {
|
||||
TreePointerMap back_pointers;
|
||||
boost::array<float, 4> score;
|
||||
score.fill(0);
|
||||
InternalTree* mytree (new InternalTree(line));
|
||||
size_t boundary_hash = 0;
|
||||
Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
|
||||
std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -727,42 +711,42 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
|
||||
m_tuneable = Scan<bool>(value);
|
||||
} else if (key == "filterable") { //ignore
|
||||
} else if (key == "path_head_lm") {
|
||||
m_path_head_lm = value;
|
||||
m_path_head_lm = value;
|
||||
} else if (key == "path_label_lm") {
|
||||
m_path_label_lm = value;
|
||||
m_path_label_lm = value;
|
||||
} else if (key == "ptkvz") {
|
||||
m_isPTKVZ = Scan<bool>(value);
|
||||
m_isPTKVZ = Scan<bool>(value);
|
||||
} else if (key == "backoff") {
|
||||
m_isPretermBackoff = Scan<bool>(value);
|
||||
m_isPretermBackoff = Scan<bool>(value);
|
||||
} else if (key == "context_up") {
|
||||
m_context_up = Scan<size_t>(value);
|
||||
m_context_up = Scan<size_t>(value);
|
||||
} else if (key == "context_left") {
|
||||
m_context_left = Scan<size_t>(value);
|
||||
m_context_left = Scan<size_t>(value);
|
||||
} else if (key == "context_right") {
|
||||
m_context_right = Scan<size_t>(value);
|
||||
m_context_right = Scan<size_t>(value);
|
||||
} else if (key == "debug_path") {
|
||||
m_debugPath = value;
|
||||
m_debugPath = value;
|
||||
} else if (key == "premultiply") {
|
||||
m_premultiply = Scan<bool>(value);
|
||||
m_premultiply = Scan<bool>(value);
|
||||
} else if (key == "rerank") {
|
||||
m_rerank = Scan<bool>(value);
|
||||
m_rerank = Scan<bool>(value);
|
||||
} else if (key == "normalize_head_lm") {
|
||||
m_normalizeHeadLM = Scan<bool>(value);
|
||||
m_normalizeHeadLM = Scan<bool>(value);
|
||||
} else if (key == "normalize_label_lm") {
|
||||
m_normalizeLabelLM = Scan<bool>(value);
|
||||
m_normalizeLabelLM = Scan<bool>(value);
|
||||
} else if (key == "binarized") {
|
||||
if (value == "left")
|
||||
m_binarized = 1;
|
||||
else if (value == "right")
|
||||
m_binarized = 2;
|
||||
else if (value == "full")
|
||||
m_binarized = 3;
|
||||
else
|
||||
UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
|
||||
if (value == "left")
|
||||
m_binarized = 1;
|
||||
else if (value == "right")
|
||||
m_binarized = 2;
|
||||
else if (value == "full")
|
||||
m_binarized = 3;
|
||||
else
|
||||
UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
|
||||
} else if (key == "glue_symbol") {
|
||||
m_glueSymbol = value;
|
||||
m_glueSymbol = value;
|
||||
} else if (key == "cache_size") {
|
||||
m_cacheSize = Scan<int>(value);
|
||||
m_cacheSize = Scan<int>(value);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
|
||||
}
|
||||
@ -808,8 +792,8 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
|
||||
size_t boundary_hash = 0;
|
||||
if (!m_rerank) {
|
||||
Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
|
||||
accumulator->PlusEquals(ff_idx, score[0] + score[1]);
|
||||
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
|
||||
accumulator->PlusEquals(ff_idx, score[0] + score[1]);
|
||||
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
|
||||
}
|
||||
mytree->Combine(previous_trees);
|
||||
if (m_rerank && full_sentence) {
|
||||
@ -818,12 +802,11 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
|
||||
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
|
||||
}
|
||||
if (m_binarized && full_sentence) {
|
||||
mytree->Unbinarize();
|
||||
mytree->Unbinarize();
|
||||
}
|
||||
|
||||
return new RDLMState(mytree, score[1], score[3], boundary_hash);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
UTIL_THROW2("Error: RDLM active, but no internal tree structure found");
|
||||
}
|
||||
|
||||
|
167
moses/LM/RDLM.h
167
moses/LM/RDLM.h
@ -11,8 +11,9 @@
|
||||
// Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics.
|
||||
// see 'scripts/training/rdlm' for training scripts
|
||||
|
||||
namespace nplm {
|
||||
class neuralTM;
|
||||
namespace nplm
|
||||
{
|
||||
class neuralTM;
|
||||
}
|
||||
|
||||
namespace Moses
|
||||
@ -32,21 +33,21 @@ public:
|
||||
{}
|
||||
|
||||
float GetApproximateScoreHead() const {
|
||||
return m_approx_head;
|
||||
return m_approx_head;
|
||||
}
|
||||
|
||||
float GetApproximateScoreLabel() const {
|
||||
return m_approx_label;
|
||||
return m_approx_label;
|
||||
}
|
||||
|
||||
size_t GetHash() const {
|
||||
return m_hash;
|
||||
return m_hash;
|
||||
}
|
||||
|
||||
int Compare(const FFState& other) const {
|
||||
if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
|
||||
else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
|
||||
else return -1;
|
||||
if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
|
||||
else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
|
||||
else return -1;
|
||||
}
|
||||
};
|
||||
|
||||
@ -121,10 +122,9 @@ public:
|
||||
, m_normalizeLabelLM(false)
|
||||
, m_sharedVocab(false)
|
||||
, m_binarized(0)
|
||||
, m_cacheSize(1000000)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
, m_cacheSize(1000000) {
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
~RDLM();
|
||||
|
||||
@ -147,21 +147,23 @@ public:
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {};
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {};
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const {};
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
UTIL_THROW(util::Exception, "Not implemented");
|
||||
};
|
||||
FFState* EvaluateWhenApplied(
|
||||
const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID - used to index the state in the previous hypotheses */,
|
||||
@ -173,71 +175,72 @@ public:
|
||||
class UnbinarizedChildren
|
||||
{
|
||||
private:
|
||||
std::vector<TreePointer>::const_iterator iter;
|
||||
std::vector<TreePointer>::const_iterator _begin;
|
||||
std::vector<TreePointer>::const_iterator _end;
|
||||
InternalTree* current;
|
||||
const TreePointerMap & back_pointers;
|
||||
bool binarized;
|
||||
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
|
||||
std::vector<TreePointer>::const_iterator iter;
|
||||
std::vector<TreePointer>::const_iterator _begin;
|
||||
std::vector<TreePointer>::const_iterator _end;
|
||||
InternalTree* current;
|
||||
const TreePointerMap & back_pointers;
|
||||
bool binarized;
|
||||
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
|
||||
|
||||
public:
|
||||
UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
|
||||
current(root),
|
||||
back_pointers(pointers),
|
||||
binarized(binary)
|
||||
{
|
||||
stack.reserve(10);
|
||||
_end = current->GetChildren().end();
|
||||
iter = current->GetChildren().begin();
|
||||
// expand virtual node
|
||||
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
|
||||
stack.push_back(std::make_pair(current, iter));
|
||||
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
|
||||
if ((*iter)->IsLeafNT()) {
|
||||
current = back_pointers.find(iter->get())->second.get();
|
||||
}
|
||||
else {
|
||||
current = iter->get();
|
||||
}
|
||||
iter = current->GetChildren().begin();
|
||||
}
|
||||
_begin = iter;
|
||||
UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
|
||||
current(root),
|
||||
back_pointers(pointers),
|
||||
binarized(binary) {
|
||||
stack.reserve(10);
|
||||
_end = current->GetChildren().end();
|
||||
iter = current->GetChildren().begin();
|
||||
// expand virtual node
|
||||
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
|
||||
stack.push_back(std::make_pair(current, iter));
|
||||
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
|
||||
if ((*iter)->IsLeafNT()) {
|
||||
current = back_pointers.find(iter->get())->second.get();
|
||||
} else {
|
||||
current = iter->get();
|
||||
}
|
||||
|
||||
std::vector<TreePointer>::const_iterator begin() const { return _begin; }
|
||||
std::vector<TreePointer>::const_iterator end() const { return _end; }
|
||||
|
||||
std::vector<TreePointer>::const_iterator operator++() {
|
||||
iter++;
|
||||
if (iter == current->GetChildren().end()) {
|
||||
while (!stack.empty()) {
|
||||
std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
|
||||
current = active.first;
|
||||
iter = ++active.second;
|
||||
stack.pop_back();
|
||||
if (iter != current->GetChildren().end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (iter == _end) {
|
||||
return iter;
|
||||
}
|
||||
}
|
||||
// expand virtual node
|
||||
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
|
||||
stack.push_back(std::make_pair(current, iter));
|
||||
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
|
||||
if ((*iter)->IsLeafNT()) {
|
||||
current = back_pointers.find(iter->get())->second.get();
|
||||
}
|
||||
else {
|
||||
current = iter->get();
|
||||
}
|
||||
iter = current->GetChildren().begin();
|
||||
}
|
||||
return iter;
|
||||
iter = current->GetChildren().begin();
|
||||
}
|
||||
_begin = iter;
|
||||
}
|
||||
|
||||
std::vector<TreePointer>::const_iterator begin() const {
|
||||
return _begin;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator end() const {
|
||||
return _end;
|
||||
}
|
||||
|
||||
std::vector<TreePointer>::const_iterator operator++() {
|
||||
iter++;
|
||||
if (iter == current->GetChildren().end()) {
|
||||
while (!stack.empty()) {
|
||||
std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
|
||||
current = active.first;
|
||||
iter = ++active.second;
|
||||
stack.pop_back();
|
||||
if (iter != current->GetChildren().end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (iter == _end) {
|
||||
return iter;
|
||||
}
|
||||
}
|
||||
// expand virtual node
|
||||
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
|
||||
stack.push_back(std::make_pair(current, iter));
|
||||
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
|
||||
if ((*iter)->IsLeafNT()) {
|
||||
current = back_pointers.find(iter->get())->second.get();
|
||||
} else {
|
||||
current = iter->get();
|
||||
}
|
||||
iter = current->GetChildren().begin();
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
@ -73,7 +73,7 @@ Manager::Manager(ttasksptr const& ttask)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
|
||||
m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
|
||||
*m_transOptColl);
|
||||
*m_transOptColl);
|
||||
|
||||
StaticData::Instance().InitializeForInput(ttask);
|
||||
}
|
||||
@ -87,7 +87,9 @@ Manager::~Manager()
|
||||
|
||||
const InputType&
|
||||
Manager::GetSource() const
|
||||
{ return m_source ; }
|
||||
{
|
||||
return m_source ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main decoder loop that translates a sentence by expanding
|
||||
@ -130,7 +132,7 @@ void Manager::Decode()
|
||||
searchTime.start();
|
||||
m_search->Decode();
|
||||
VERBOSE(1, "Line " << m_source.GetTranslationId()
|
||||
<< ": Search took " << searchTime << " seconds" << endl);
|
||||
<< ": Search took " << searchTime << " seconds" << endl);
|
||||
IFVERBOSE(2) {
|
||||
GetSentenceStats().StopTimeTotal();
|
||||
TRACE_ERR(GetSentenceStats());
|
||||
|
@ -110,7 +110,7 @@ private:
|
||||
#endif
|
||||
|
||||
public:
|
||||
void SetOutputStream(std::ostream* outStream){
|
||||
void SetOutputStream(std::ostream* outStream) {
|
||||
m_outStream = outStream;
|
||||
}
|
||||
|
||||
|
@ -203,7 +203,7 @@ Parameter::Parameter()
|
||||
AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
|
||||
AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
|
||||
AddParam(nbest_opts,"print-alignment-info-in-n-best",
|
||||
"Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
|
||||
"Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// server options
|
||||
@ -215,7 +215,7 @@ Parameter::Parameter()
|
||||
|
||||
po::options_description irstlm_opts("IRSTLM Options");
|
||||
AddParam(irstlm_opts,"clean-lm-cache",
|
||||
"clean language model caches after N translations (default N=1)");
|
||||
"clean language model caches after N translations (default N=1)");
|
||||
|
||||
po::options_description chart_opts("Chart Decoding Options");
|
||||
AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
|
||||
@ -346,8 +346,8 @@ const PARAM_VEC *Parameter::GetParam(const std::string ¶mName) const
|
||||
void
|
||||
Parameter::
|
||||
AddParam(po::options_description& optgroup,
|
||||
string const& paramName,
|
||||
string const& description)
|
||||
string const& paramName,
|
||||
string const& description)
|
||||
{
|
||||
m_valid[paramName] = true;
|
||||
m_description[paramName] = description;
|
||||
@ -358,9 +358,9 @@ AddParam(po::options_description& optgroup,
|
||||
void
|
||||
Parameter::
|
||||
AddParam(po::options_description& optgroup,
|
||||
string const& paramName,
|
||||
string const& abbrevName,
|
||||
string const& description)
|
||||
string const& paramName,
|
||||
string const& abbrevName,
|
||||
string const& description)
|
||||
{
|
||||
m_valid[paramName] = true;
|
||||
m_valid[abbrevName] = true;
|
||||
@ -368,11 +368,10 @@ AddParam(po::options_description& optgroup,
|
||||
m_fullname[abbrevName] = paramName;
|
||||
m_description[paramName] = description;
|
||||
string optname = paramName;
|
||||
if (abbrevName.size() == 1)
|
||||
{
|
||||
optname += string(",")+abbrevName;
|
||||
// m_confusable[abbrevName[0]].insert(paramName);
|
||||
}
|
||||
if (abbrevName.size() == 1) {
|
||||
optname += string(",")+abbrevName;
|
||||
// m_confusable[abbrevName[0]].insert(paramName);
|
||||
}
|
||||
optgroup.add_options()(optname.c_str(),description.c_str());
|
||||
}
|
||||
|
||||
@ -429,12 +428,11 @@ LoadParam(int argc, char* xargv[])
|
||||
// legacy parameter handling: all parameters are expected
|
||||
// to start with a single dash
|
||||
char* argv[argc+1];
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
argv[i] = xargv[i];
|
||||
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
|
||||
++argv[i];
|
||||
}
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
argv[i] = xargv[i];
|
||||
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
|
||||
++argv[i];
|
||||
}
|
||||
|
||||
// config file (-f) arg mandatory
|
||||
string configPath;
|
||||
@ -1260,7 +1258,7 @@ Validate()
|
||||
bool
|
||||
Parameter::
|
||||
FilesExist(const string ¶mName, int fieldNo,
|
||||
std::vector<std::string> const& extensions)
|
||||
std::vector<std::string> const& extensions)
|
||||
{
|
||||
typedef std::vector<std::string> StringVec;
|
||||
StringVec::const_iterator iter;
|
||||
@ -1589,7 +1587,7 @@ template<>
|
||||
void
|
||||
Parameter::
|
||||
SetParameter<bool>(bool ¶meter, std::string const& parameterName,
|
||||
bool const& defaultValue) const
|
||||
bool const& defaultValue) const
|
||||
{
|
||||
const PARAM_VEC *params = GetParam(parameterName);
|
||||
|
||||
|
@ -66,27 +66,27 @@ protected:
|
||||
|
||||
void
|
||||
AddParam(options_description& optgroup,
|
||||
value_semantic const* optvalue,
|
||||
std::string const& paramName,
|
||||
std::string const& description);
|
||||
value_semantic const* optvalue,
|
||||
std::string const& paramName,
|
||||
std::string const& description);
|
||||
|
||||
void
|
||||
AddParam(options_description& optgroup,
|
||||
std::string const ¶mName,
|
||||
std::string const &description);
|
||||
std::string const ¶mName,
|
||||
std::string const &description);
|
||||
|
||||
void
|
||||
AddParam(options_description& optgroup,
|
||||
value_semantic const* optvalue,
|
||||
std::string const& paramName,
|
||||
std::string const& abbrevName,
|
||||
std::string const& description);
|
||||
value_semantic const* optvalue,
|
||||
std::string const& paramName,
|
||||
std::string const& abbrevName,
|
||||
std::string const& description);
|
||||
|
||||
void
|
||||
AddParam(options_description& optgroup,
|
||||
std::string const& paramName,
|
||||
std::string const& abbrevName,
|
||||
std::string const& description);
|
||||
std::string const& paramName,
|
||||
std::string const& abbrevName,
|
||||
std::string const& description);
|
||||
|
||||
void PrintCredit();
|
||||
void PrintFF() const;
|
||||
|
@ -67,7 +67,7 @@ RegisterScoreProducer(FeatureFunction* scoreProducer)
|
||||
VERBOSE(1, "FeatureFunction: "
|
||||
<< scoreProducer->GetScoreProducerDescription()
|
||||
<< " start: " << start
|
||||
<< " end: " << (s_denseVectorSize-1) << endl);
|
||||
<< " end: " << (s_denseVectorSize-1) << endl);
|
||||
}
|
||||
|
||||
|
||||
@ -194,21 +194,19 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
|
||||
}
|
||||
|
||||
std::vector<FeatureFunction*> const& all_ff
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
|
||||
{
|
||||
string name = ff->GetScoreProducerDescription();
|
||||
size_t i = ff->GetIndex();
|
||||
if (ff->GetNumScoreComponents() == 1)
|
||||
out << name << sep << m_scores[i] << linesep;
|
||||
else
|
||||
{
|
||||
size_t stop = i + ff->GetNumScoreComponents();
|
||||
boost::format fmt("%s_%d");
|
||||
for (size_t k = 1; i < stop; ++i, ++k)
|
||||
out << fmt % name % k << sep << m_scores[i] << linesep;
|
||||
}
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
BOOST_FOREACH(FeatureFunction const* ff, all_ff) {
|
||||
string name = ff->GetScoreProducerDescription();
|
||||
size_t i = ff->GetIndex();
|
||||
if (ff->GetNumScoreComponents() == 1)
|
||||
out << name << sep << m_scores[i] << linesep;
|
||||
else {
|
||||
size_t stop = i + ff->GetNumScoreComponents();
|
||||
boost::format fmt("%s_%d");
|
||||
for (size_t k = 1; i < stop; ++i, ++k)
|
||||
out << fmt % name % k << sep << m_scores[i] << linesep;
|
||||
}
|
||||
}
|
||||
// write sparse features
|
||||
m_scores.write(out,sep,linesep);
|
||||
}
|
||||
|
@ -231,10 +231,10 @@ public:
|
||||
//! produced by sp
|
||||
void
|
||||
PlusEquals(const FeatureFunction* sp,
|
||||
const ScoreComponentCollection& scores) {
|
||||
const ScoreComponentCollection& scores) {
|
||||
size_t i = sp->GetIndex();
|
||||
size_t stop = i + sp->GetNumScoreComponents();
|
||||
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
|
||||
for (; i < stop; ++i) m_scores[i] += scores.m_scores[i];
|
||||
}
|
||||
|
||||
//! Add scores from a single FeatureFunction only
|
||||
|
@ -60,28 +60,23 @@ aux_init_partial_translation(string& line)
|
||||
string sourceCompletedStr;
|
||||
int loc1 = line.find( "|||", 0 );
|
||||
int loc2 = line.find( "|||", loc1 + 3 );
|
||||
if (loc1 > -1 && loc2 > -1)
|
||||
{
|
||||
m_initialTargetPhrase = Trim(line.substr(0, loc1));
|
||||
string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
|
||||
line = line.substr(loc2 + 3);
|
||||
if (loc1 > -1 && loc2 > -1) {
|
||||
m_initialTargetPhrase = Trim(line.substr(0, loc1));
|
||||
string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
|
||||
line = line.substr(loc2 + 3);
|
||||
|
||||
m_sourceCompleted.resize(scov.size());
|
||||
int contiguous = 1;
|
||||
for (size_t i = 0; i < scov.size(); ++i)
|
||||
{
|
||||
if (sourceCompletedStr.at(i) == '1')
|
||||
{
|
||||
m_sourceCompleted[i] = true;
|
||||
if (contiguous) m_frontSpanCoveredLength++;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_sourceCompleted[i] = false;
|
||||
contiguous = 0;
|
||||
}
|
||||
}
|
||||
m_sourceCompleted.resize(scov.size());
|
||||
int contiguous = 1;
|
||||
for (size_t i = 0; i < scov.size(); ++i) {
|
||||
if (sourceCompletedStr.at(i) == '1') {
|
||||
m_sourceCompleted[i] = true;
|
||||
if (contiguous) m_frontSpanCoveredLength++;
|
||||
} else {
|
||||
m_sourceCompleted[i] = false;
|
||||
contiguous = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -94,38 +89,31 @@ aux_interpret_sgml_markup(string& line)
|
||||
metamap::const_iterator i;
|
||||
if ((i = meta.find("id")) != meta.end())
|
||||
this->SetTranslationId(atol(i->second.c_str()));
|
||||
if ((i = meta.find("docid")) != meta.end())
|
||||
{
|
||||
this->SetDocumentId(atol(i->second.c_str()));
|
||||
this->SetUseTopicId(false);
|
||||
if ((i = meta.find("docid")) != meta.end()) {
|
||||
this->SetDocumentId(atol(i->second.c_str()));
|
||||
this->SetUseTopicId(false);
|
||||
this->SetUseTopicIdAndProb(false);
|
||||
}
|
||||
if ((i = meta.find("topic")) != meta.end()) {
|
||||
vector<string> topic_params;
|
||||
boost::split(topic_params, i->second, boost::is_any_of("\t "));
|
||||
if (topic_params.size() == 1) {
|
||||
this->SetTopicId(atol(topic_params[0].c_str()));
|
||||
this->SetUseTopicId(true);
|
||||
this->SetUseTopicIdAndProb(false);
|
||||
} else {
|
||||
this->SetTopicIdAndProb(topic_params);
|
||||
this->SetUseTopicId(false);
|
||||
this->SetUseTopicIdAndProb(true);
|
||||
}
|
||||
if ((i = meta.find("topic")) != meta.end())
|
||||
{
|
||||
vector<string> topic_params;
|
||||
boost::split(topic_params, i->second, boost::is_any_of("\t "));
|
||||
if (topic_params.size() == 1)
|
||||
{
|
||||
this->SetTopicId(atol(topic_params[0].c_str()));
|
||||
this->SetUseTopicId(true);
|
||||
this->SetUseTopicIdAndProb(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->SetTopicIdAndProb(topic_params);
|
||||
this->SetUseTopicId(false);
|
||||
this->SetUseTopicIdAndProb(true);
|
||||
}
|
||||
}
|
||||
if ((i = meta.find("weight-setting")) != meta.end())
|
||||
{
|
||||
this->SetWeightSetting(i->second);
|
||||
this->SetSpecifiesWeightSetting(true);
|
||||
StaticData::Instance().SetWeightSetting(i->second);
|
||||
// oh this is so horrible! Why does this have to be propagated globally?
|
||||
// --- UG
|
||||
}
|
||||
else this->SetSpecifiesWeightSetting(false);
|
||||
}
|
||||
if ((i = meta.find("weight-setting")) != meta.end()) {
|
||||
this->SetWeightSetting(i->second);
|
||||
this->SetSpecifiesWeightSetting(true);
|
||||
StaticData::Instance().SetWeightSetting(i->second);
|
||||
// oh this is so horrible! Why does this have to be propagated globally?
|
||||
// --- UG
|
||||
} else this->SetSpecifiesWeightSetting(false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -135,48 +123,44 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
|
||||
using namespace std;
|
||||
typedef map<string, string> str2str_map;
|
||||
vector<str2str_map> meta = ProcessAndStripDLT(line);
|
||||
BOOST_FOREACH(str2str_map const& M, meta)
|
||||
{
|
||||
str2str_map::const_iterator i,j;
|
||||
if ((i = M.find("type")) != M.end())
|
||||
{
|
||||
j = M.find("id");
|
||||
string id = j == M.end() ? "default" : j->second;
|
||||
if (i->second == "cbtm")
|
||||
{
|
||||
PhraseDictionaryDynamicCacheBased* cbtm;
|
||||
cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
|
||||
if (cbtm) cbtm->ExecuteDlt(M);
|
||||
}
|
||||
if (i->second == "cblm")
|
||||
{
|
||||
DynamicCacheBasedLanguageModel* cblm;
|
||||
cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
|
||||
if (cblm) cblm->ExecuteDlt(M);
|
||||
}
|
||||
}
|
||||
BOOST_FOREACH(str2str_map const& M, meta) {
|
||||
str2str_map::const_iterator i,j;
|
||||
if ((i = M.find("type")) != M.end()) {
|
||||
j = M.find("id");
|
||||
string id = j == M.end() ? "default" : j->second;
|
||||
if (i->second == "cbtm") {
|
||||
PhraseDictionaryDynamicCacheBased* cbtm;
|
||||
cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
|
||||
if (cbtm) cbtm->ExecuteDlt(M);
|
||||
}
|
||||
if (i->second == "cblm") {
|
||||
DynamicCacheBasedLanguageModel* cblm;
|
||||
cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
|
||||
if (cblm) cblm->ExecuteDlt(M);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sentence::
|
||||
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders)
|
||||
{ // parse XML markup in translation line
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders)
|
||||
{
|
||||
// parse XML markup in translation line
|
||||
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
|
||||
using namespace std;
|
||||
if (SD.GetXmlInputType() != XmlPassThrough)
|
||||
{
|
||||
int offset = SD.IsSyntax() ? 1 : 0;
|
||||
bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
|
||||
m_reorderingConstraint,
|
||||
xmlWalls, placeholders, offset,
|
||||
SD.GetXmlBrackets().first,
|
||||
SD.GetXmlBrackets().second);
|
||||
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
|
||||
}
|
||||
if (SD.GetXmlInputType() != XmlPassThrough) {
|
||||
int offset = SD.IsSyntax() ? 1 : 0;
|
||||
bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
|
||||
m_reorderingConstraint,
|
||||
xmlWalls, placeholders, offset,
|
||||
SD.GetXmlBrackets().first,
|
||||
SD.GetXmlBrackets().second);
|
||||
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -197,11 +181,10 @@ init(string line, std::vector<FactorType> const& factorOrder)
|
||||
aux_interpret_dlt(line); // some poorly documented cache-based stuff
|
||||
|
||||
// if sentences is specified as "<passthrough tag1=""/>"
|
||||
if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled())
|
||||
{
|
||||
string pthru = PassthroughSGML(line,"passthrough");
|
||||
this->SetPassthroughInformation(pthru);
|
||||
}
|
||||
if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) {
|
||||
string pthru = PassthroughSGML(line,"passthrough");
|
||||
this->SetPassthroughInformation(pthru);
|
||||
}
|
||||
|
||||
vector<size_t> xmlWalls;
|
||||
vector<pair<size_t, string> >placeholders;
|
||||
@ -218,26 +201,23 @@ init(string line, std::vector<FactorType> const& factorOrder)
|
||||
// our XmlOptions and create TranslationOptions
|
||||
|
||||
// only fill the vector if we are parsing XML
|
||||
if (SD.GetXmlInputType() != XmlPassThrough)
|
||||
{
|
||||
m_xmlCoverageMap.assign(GetSize(), false);
|
||||
BOOST_FOREACH(XmlOption* o, m_xmlOptions)
|
||||
{
|
||||
WordsRange const& r = o->range;
|
||||
for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
|
||||
m_xmlCoverageMap[j]=true;
|
||||
}
|
||||
if (SD.GetXmlInputType() != XmlPassThrough) {
|
||||
m_xmlCoverageMap.assign(GetSize(), false);
|
||||
BOOST_FOREACH(XmlOption* o, m_xmlOptions) {
|
||||
WordsRange const& r = o->range;
|
||||
for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
|
||||
m_xmlCoverageMap[j]=true;
|
||||
}
|
||||
}
|
||||
|
||||
// reordering walls and zones
|
||||
m_reorderingConstraint.InitializeWalls(GetSize());
|
||||
|
||||
// set reordering walls, if "-monotone-at-punction" is set
|
||||
if (SD.UseReorderingConstraint() && GetSize())
|
||||
{
|
||||
WordsRange r(0, GetSize()-1);
|
||||
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
|
||||
}
|
||||
if (SD.UseReorderingConstraint() && GetSize()) {
|
||||
WordsRange r(0, GetSize()-1);
|
||||
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
|
||||
}
|
||||
|
||||
// set walls obtained from xml
|
||||
for(size_t i=0; i<xmlWalls.size(); i++)
|
||||
@ -283,8 +263,8 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
||||
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
|
||||
TranslationOptionCollection *rv
|
||||
= new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
|
||||
transOptThreshold);
|
||||
= new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
|
||||
transOptThreshold);
|
||||
assert(rv);
|
||||
return rv;
|
||||
}
|
||||
@ -386,7 +366,7 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
|
||||
|
||||
Sentence::
|
||||
Sentence(size_t const transId, string const& stext,
|
||||
vector<FactorType> const* IFO)
|
||||
vector<FactorType> const* IFO)
|
||||
: InputType(transId)
|
||||
{
|
||||
if (IFO) init(stext, *IFO);
|
||||
|
155
moses/Sentence.h
155
moses/Sentence.h
@ -32,109 +32,110 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class WordsRange;
|
||||
class PhraseDictionary;
|
||||
class TranslationOption;
|
||||
class TranslationOptionCollection;
|
||||
class ChartTranslationOptions;
|
||||
class TranslationTask;
|
||||
struct XmlOption;
|
||||
class WordsRange;
|
||||
class PhraseDictionary;
|
||||
class TranslationOption;
|
||||
class TranslationOptionCollection;
|
||||
class ChartTranslationOptions;
|
||||
class TranslationTask;
|
||||
struct XmlOption;
|
||||
|
||||
|
||||
/**
|
||||
* A Phrase class with an ID. Used specifically as source input so contains functionality to read
|
||||
* from IODevice and create trans opt
|
||||
*/
|
||||
class Sentence : public Phrase, public InputType
|
||||
{
|
||||
protected:
|
||||
|
||||
/**
|
||||
* A Phrase class with an ID. Used specifically as source input so contains functionality to read
|
||||
* from IODevice and create trans opt
|
||||
* Utility method that takes in a string representing an XML tag and the name of the attribute,
|
||||
* and returns the value of that tag if present, empty string otherwise
|
||||
*/
|
||||
class Sentence : public Phrase, public InputType
|
||||
{
|
||||
protected:
|
||||
std::vector<XmlOption*> m_xmlOptions;
|
||||
std::vector <bool> m_xmlCoverageMap;
|
||||
|
||||
/**
|
||||
* Utility method that takes in a string representing an XML tag and the name of the attribute,
|
||||
* and returns the value of that tag if present, empty string otherwise
|
||||
*/
|
||||
std::vector<XmlOption*> m_xmlOptions;
|
||||
std::vector <bool> m_xmlCoverageMap;
|
||||
NonTerminalSet m_defaultLabelSet;
|
||||
|
||||
NonTerminalSet m_defaultLabelSet;
|
||||
|
||||
void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
|
||||
void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
|
||||
|
||||
|
||||
public:
|
||||
Sentence();
|
||||
Sentence(size_t const transId, std::string const& stext,
|
||||
std::vector<FactorType> const* IFO = NULL);
|
||||
// Sentence(size_t const transId, std::string const& stext);
|
||||
~Sentence();
|
||||
public:
|
||||
Sentence();
|
||||
Sentence(size_t const transId, std::string const& stext,
|
||||
std::vector<FactorType> const* IFO = NULL);
|
||||
// Sentence(size_t const transId, std::string const& stext);
|
||||
~Sentence();
|
||||
|
||||
InputTypeEnum GetType() const {
|
||||
return SentenceInput;
|
||||
}
|
||||
InputTypeEnum GetType() const {
|
||||
return SentenceInput;
|
||||
}
|
||||
|
||||
//! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
|
||||
Phrase GetSubString(const WordsRange& r) const {
|
||||
return Phrase::GetSubString(r);
|
||||
}
|
||||
//! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
|
||||
Phrase GetSubString(const WordsRange& r) const {
|
||||
return Phrase::GetSubString(r);
|
||||
}
|
||||
|
||||
//! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
|
||||
const Word& GetWord(size_t pos) const {
|
||||
return Phrase::GetWord(pos);
|
||||
}
|
||||
//! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
|
||||
const Word& GetWord(size_t pos) const {
|
||||
return Phrase::GetWord(pos);
|
||||
}
|
||||
|
||||
//! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
|
||||
size_t GetSize() const {
|
||||
return Phrase::GetSize();
|
||||
}
|
||||
//! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
|
||||
size_t GetSize() const {
|
||||
return Phrase::GetSize();
|
||||
}
|
||||
|
||||
//! Returns true if there were any XML tags parsed that at least partially covered the range passed
|
||||
bool XmlOverlap(size_t startPos, size_t endPos) const;
|
||||
//! Returns true if there were any XML tags parsed that at least partially covered the range passed
|
||||
bool XmlOverlap(size_t startPos, size_t endPos) const;
|
||||
|
||||
//! populates vector argument with XML force translation options for the specific range passed
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
|
||||
//! populates vector argument with XML force translation options for the specific range passed
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
|
||||
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
void Print(std::ostream& out) const;
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
void Print(std::ostream& out) const;
|
||||
|
||||
TranslationOptionCollection*
|
||||
CreateTranslationOptionCollection(ttasksptr const& ttask) const;
|
||||
TranslationOptionCollection*
|
||||
CreateTranslationOptionCollection(ttasksptr const& ttask) const;
|
||||
|
||||
virtual void
|
||||
CreateFromString(std::vector<FactorType> const &factorOrder,
|
||||
std::string const& phraseString);
|
||||
virtual void
|
||||
CreateFromString(std::vector<FactorType> const &factorOrder,
|
||||
std::string const& phraseString);
|
||||
|
||||
const NonTerminalSet&
|
||||
GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const
|
||||
{ return m_defaultLabelSet; }
|
||||
const NonTerminalSet&
|
||||
GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
|
||||
return m_defaultLabelSet;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
init(std::string line, std::vector<FactorType> const& factorOrder);
|
||||
void
|
||||
init(std::string line, std::vector<FactorType> const& factorOrder);
|
||||
|
||||
private:
|
||||
// auxliliary functions for Sentence initialization
|
||||
// void aux_interpret_sgml_markup(std::string& line);
|
||||
// void aux_interpret_dlt(std::string& line);
|
||||
// void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
|
||||
// std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
private:
|
||||
// auxliliary functions for Sentence initialization
|
||||
// void aux_interpret_sgml_markup(std::string& line);
|
||||
// void aux_interpret_dlt(std::string& line);
|
||||
// void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
|
||||
// std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
|
||||
void
|
||||
aux_interpret_sgml_markup(std::string& line);
|
||||
void
|
||||
aux_interpret_sgml_markup(std::string& line);
|
||||
|
||||
void
|
||||
aux_interpret_dlt(std::string& line);
|
||||
void
|
||||
aux_interpret_dlt(std::string& line);
|
||||
|
||||
void
|
||||
aux_interpret_xml
|
||||
(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
void
|
||||
aux_interpret_xml
|
||||
(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
|
||||
void
|
||||
aux_init_partial_translation(std::string& line);
|
||||
void
|
||||
aux_init_partial_translation(std::string& line);
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ StaticData
|
||||
|
||||
string &feature = toks[0];
|
||||
std::map<std::string, std::string>::const_iterator iter
|
||||
= featureNameOverride.find(feature);
|
||||
= featureNameOverride.find(feature);
|
||||
if (iter == featureNameOverride.end()) {
|
||||
// feature name not override
|
||||
m_registry.Construct(feature, line);
|
||||
@ -146,7 +146,7 @@ StaticData
|
||||
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
|
||||
|
||||
m_parameter->SetParameter(m_continuePartialTranslation,
|
||||
"continue-partial-translation", false );
|
||||
"continue-partial-translation", false );
|
||||
|
||||
std::string s_it = "text input";
|
||||
if (m_inputType == 1) {
|
||||
@ -160,7 +160,7 @@ StaticData
|
||||
}
|
||||
VERBOSE(2,"input type is: "<<s_it<<"\n");
|
||||
|
||||
// use of xml in input
|
||||
// use of xml in input
|
||||
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
@ -178,7 +178,7 @@ StaticData
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
|
||||
"default-non-term-for-empty-range-only", false );
|
||||
"default-non-term-for-empty-range-only", false );
|
||||
|
||||
}
|
||||
|
||||
@ -347,18 +347,18 @@ StaticData
|
||||
|
||||
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
|
||||
"print-alignment-info-in-n-best", false );
|
||||
"print-alignment-info-in-n-best", false );
|
||||
|
||||
// include feature names in the n-best list
|
||||
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
|
||||
|
||||
// include word alignment in the n-best list
|
||||
m_parameter->SetParameter(m_nBestIncludesSegmentation,
|
||||
"include-segmentation-in-n-best", false );
|
||||
"include-segmentation-in-n-best", false );
|
||||
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter(m_reportAllFactorsNBest,
|
||||
"report-all-factors-in-n-best", false );
|
||||
"report-all-factors-in-n-best", false );
|
||||
|
||||
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
|
||||
return true;
|
||||
@ -412,7 +412,7 @@ StaticData
|
||||
#ifndef WITH_THREADS
|
||||
if (m_threadCount > 1) {
|
||||
std::cerr << "Error: Thread count of " << params->at(0)
|
||||
<< " but moses not built with thread support";
|
||||
<< " but moses not built with thread support";
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
@ -426,11 +426,11 @@ StaticData
|
||||
::ini_cube_pruning_options()
|
||||
{
|
||||
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
|
||||
false);
|
||||
false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -468,7 +468,7 @@ void
|
||||
StaticData
|
||||
::ini_oov_options()
|
||||
{
|
||||
// unknown word processing
|
||||
// unknown word processing
|
||||
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
|
||||
m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
|
||||
|
||||
@ -647,7 +647,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
|
||||
RecursiveCYKPlus);
|
||||
RecursiveCYKPlus);
|
||||
|
||||
|
||||
ini_zombie_options(); // probably dead, or maybe not
|
||||
@ -1016,7 +1016,7 @@ StaticData
|
||||
::InitializeForInput(ttasksptr const& ttask) const
|
||||
{
|
||||
const std::vector<FeatureFunction*> &producers
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0; i<producers.size(); ++i) {
|
||||
FeatureFunction &ff = *producers[i];
|
||||
if (! IsFeatureFunctionIgnored(ff)) {
|
||||
@ -1024,7 +1024,7 @@ StaticData
|
||||
iTime.start();
|
||||
ff.InitializeForInput(ttask);
|
||||
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )"
|
||||
<< "= " << iTime << endl);
|
||||
<< "= " << iTime << endl);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1034,7 +1034,7 @@ StaticData
|
||||
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
|
||||
{
|
||||
const std::vector<FeatureFunction*> &producers
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0; i<producers.size(); ++i) {
|
||||
FeatureFunction &ff = *producers[i];
|
||||
if (! IsFeatureFunctionIgnored(ff)) {
|
||||
@ -1111,7 +1111,7 @@ bool StaticData::CheckWeights() const
|
||||
|
||||
if (!weightNames.empty()) {
|
||||
cerr << "The following weights have no feature function. "
|
||||
<< "Maybe incorrectly spelt weights: ";
|
||||
<< "Maybe incorrectly spelt weights: ";
|
||||
set<string>::iterator iter;
|
||||
for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
|
||||
cerr << *iter << ",";
|
||||
|
@ -476,18 +476,18 @@ public:
|
||||
// m_searchAlgorithm == SyntaxF2S;
|
||||
// }
|
||||
|
||||
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const
|
||||
{
|
||||
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
|
||||
if (algo == DefaultSearchAlgorithm)
|
||||
algo = m_searchAlgorithm;
|
||||
return (algo == CYKPlus || algo == ChartIncremental ||
|
||||
algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
|
||||
algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
|
||||
}
|
||||
|
||||
const ScoreComponentCollection&
|
||||
GetAllWeights() const
|
||||
{ return m_allWeights; }
|
||||
GetAllWeights() const {
|
||||
return m_allWeights;
|
||||
}
|
||||
|
||||
void SetAllWeights(const ScoreComponentCollection& weights) {
|
||||
m_allWeights = weights;
|
||||
|
@ -146,7 +146,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
}
|
||||
|
||||
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
|
||||
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
|
||||
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
|
||||
{
|
||||
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
|
||||
p != hp.nodeSeqs.end(); ++p) {
|
||||
|
@ -31,7 +31,7 @@ public:
|
||||
|
||||
private:
|
||||
void ExtractSourceTerminalSetFromHyperPath(
|
||||
const HyperPath &, boost::unordered_set<std::size_t> &);
|
||||
const HyperPath &, boost::unordered_set<std::size_t> &);
|
||||
};
|
||||
|
||||
} // namespace F2S
|
||||
|
@ -39,7 +39,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask)
|
||||
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&m_source)) {
|
||||
m_forest = p->GetForest();
|
||||
m_rootVertex = p->GetRootVertex();
|
||||
m_sentenceLength = p->GetSize();
|
||||
m_sentenceLength = p->GetSize();
|
||||
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) {
|
||||
T2S::InputTreeBuilder builder;
|
||||
T2S::InputTree tmpTree;
|
||||
|
@ -39,7 +39,7 @@ public:
|
||||
|
||||
typedef std::vector<boost::shared_ptr<KBestExtractor::Derivation> > kBestList_t;
|
||||
void ExtractKBest(std::size_t k, kBestList_t& kBestList,
|
||||
bool onlyDistinct=false) const;
|
||||
bool onlyDistinct=false) const;
|
||||
|
||||
void OutputDetailedTranslationReport(OutputCollector *collector) const;
|
||||
|
||||
|
@ -11,34 +11,34 @@ namespace Syntax
|
||||
{
|
||||
|
||||
InputWeightFF::InputWeightFF(const std::string &line)
|
||||
: StatelessFeatureFunction(1, line)
|
||||
: StatelessFeatureFunction(1, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void InputWeightFF::EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
// TODO Throw exception.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void InputWeightFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
// TODO Throw exception.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void InputWeightFF::EvaluateWhenApplied(
|
||||
const Syntax::SHyperedge &hyperedge,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
const Syntax::SHyperedge &hyperedge,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
accumulator->PlusEquals(this, hyperedge.label.inputWeight);
|
||||
}
|
||||
|
||||
void InputWeightFF::SetParameter(const std::string& key,
|
||||
const std::string& value)
|
||||
const std::string& value)
|
||||
{
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ public:
|
||||
ScoreComponentCollection *) const {}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(
|
||||
const InputType &, const TranslationOptionList &) const {}
|
||||
const InputType &, const TranslationOptionList &) const {}
|
||||
};
|
||||
|
||||
} // Syntax
|
||||
|
@ -225,21 +225,19 @@ void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProduce
|
||||
|
||||
boost::shared_ptr<Scores>
|
||||
mergescores(boost::shared_ptr<Scores> const& a,
|
||||
boost::shared_ptr<Scores> const& b)
|
||||
boost::shared_ptr<Scores> const& b)
|
||||
{
|
||||
boost::shared_ptr<Scores> ret;
|
||||
if (!a) return b ? b : ret;
|
||||
if (!b) return a;
|
||||
if (a->size() != b->size()) return ret;
|
||||
ret.reset(new Scores(*a));
|
||||
for (size_t i = 0; i < a->size(); ++i)
|
||||
{
|
||||
if ((*a)[i] == 0) (*a)[i] = (*b)[i];
|
||||
else if ((*b)[i])
|
||||
{
|
||||
UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
|
||||
}
|
||||
for (size_t i = 0; i < a->size(); ++i) {
|
||||
if ((*a)[i] == 0) (*a)[i] = (*b)[i];
|
||||
else if ((*b)[i]) {
|
||||
UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -253,12 +251,11 @@ Merge(const TargetPhrase ©, const std::vector<FactorType>& factorVec)
|
||||
m_fullScore += copy.m_fullScore;
|
||||
typedef ScoreCache_t::iterator iter;
|
||||
typedef ScoreCache_t::value_type item;
|
||||
BOOST_FOREACH(item const& s, copy.m_cached_scores)
|
||||
{
|
||||
pair<iter,bool> foo = m_cached_scores.insert(s);
|
||||
if (foo.second == false)
|
||||
foo.first->second = mergescores(foo.first->second, s.second);
|
||||
}
|
||||
BOOST_FOREACH(item const& s, copy.m_cached_scores) {
|
||||
pair<iter,bool> foo = m_cached_scores.insert(s);
|
||||
if (foo.second == false)
|
||||
foo.first->second = mergescores(foo.first->second, s.second);
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhrase::ScoreCache_t const&
|
||||
@ -279,8 +276,10 @@ GetExtraScores(FeatureFunction const* ff) const
|
||||
void
|
||||
TargetPhrase::
|
||||
SetExtraScores(FeatureFunction const* ff,
|
||||
boost::shared_ptr<Scores> const& s)
|
||||
{ m_cached_scores[ff] = s; }
|
||||
boost::shared_ptr<Scores> const& s)
|
||||
{
|
||||
m_cached_scores[ff] = s;
|
||||
}
|
||||
|
||||
|
||||
void TargetPhrase::SetProperties(const StringPiece &str)
|
||||
|
@ -51,15 +51,15 @@ class PhraseDictionary;
|
||||
*/
|
||||
class TargetPhrase: public Phrase
|
||||
{
|
||||
public:
|
||||
public:
|
||||
typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> >
|
||||
ScoreCache_t;
|
||||
ScoreCache_t;
|
||||
ScoreCache_t const& GetExtraScores() const;
|
||||
Scores const* GetExtraScores(FeatureFunction const* ff) const;
|
||||
void SetExtraScores(FeatureFunction const* ff,
|
||||
boost::shared_ptr<Scores> const& scores);
|
||||
boost::shared_ptr<Scores> const& scores);
|
||||
|
||||
private:
|
||||
private:
|
||||
ScoreCache_t m_cached_scores;
|
||||
|
||||
private:
|
||||
|
@ -18,7 +18,7 @@ class TrainingTask : public Moses::TranslationTask
|
||||
|
||||
protected:
|
||||
TrainingTask(boost::shared_ptr<Moses::InputType> const source,
|
||||
boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
|
||||
boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
|
||||
: TranslationTask(source, ioWrapper)
|
||||
{ }
|
||||
|
||||
@ -26,8 +26,7 @@ public:
|
||||
|
||||
// factory function
|
||||
static boost::shared_ptr<TrainingTask>
|
||||
create(boost::shared_ptr<InputType> const& source)
|
||||
{
|
||||
create(boost::shared_ptr<InputType> const& source) {
|
||||
boost::shared_ptr<IOWrapper> nix;
|
||||
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, nix));
|
||||
ret->m_self = ret;
|
||||
@ -37,8 +36,7 @@ public:
|
||||
// factory function
|
||||
static boost::shared_ptr<TrainingTask>
|
||||
create(boost::shared_ptr<InputType> const& source,
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
{
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper) {
|
||||
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
|
||||
ret->m_self = ret;
|
||||
return ret;
|
||||
@ -53,7 +51,7 @@ public:
|
||||
std::cerr << *m_source << std::endl;
|
||||
|
||||
TranslationOptionCollection *transOptColl
|
||||
= m_source->CreateTranslationOptionCollection(this->self());
|
||||
= m_source->CreateTranslationOptionCollection(this->self());
|
||||
transOptColl->CreateTranslationOptions();
|
||||
delete transOptColl;
|
||||
|
||||
|
@ -163,7 +163,7 @@ public:
|
||||
#ifdef WITH_THREADS
|
||||
|
||||
boost::shared_ptr<HashTask<Keys> >
|
||||
ht(new HashTask<Keys>(current, *this, keys));
|
||||
ht(new HashTask<Keys>(current, *this, keys));
|
||||
m_threadPool.Submit(ht);
|
||||
#else
|
||||
CalcHash(current, keys);
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
size_t read = 0;
|
||||
read += ftruncate(m_file_desc, m_map_size);
|
||||
m_data_ptr = (char *)util::MapOrThrow(
|
||||
m_map_size, true, map_shared, false, m_file_desc, 0);
|
||||
m_map_size, true, map_shared, false, m_file_desc, 0);
|
||||
return (pointer)m_data_ptr;
|
||||
} else {
|
||||
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
|
||||
@ -142,7 +142,7 @@ public:
|
||||
size_t map_size = m_map_size + relative_offset;
|
||||
|
||||
m_data_ptr = (char *)util::MapOrThrow(
|
||||
m_map_size, false, map_shared, false, m_file_desc, map_offset);
|
||||
m_map_size, false, map_shared, false, m_file_desc, map_offset);
|
||||
|
||||
return (pointer)(m_data_ptr + relative_offset);
|
||||
}
|
||||
|
@ -117,8 +117,7 @@ public:
|
||||
|
||||
virtual
|
||||
TargetPhraseCollection const *
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src)
|
||||
{
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) {
|
||||
return GetTargetPhraseCollectionLEGACY(src);
|
||||
}
|
||||
|
||||
@ -129,8 +128,7 @@ public:
|
||||
virtual
|
||||
void
|
||||
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
|
||||
const InputPathList &inputPathQueue) const
|
||||
{
|
||||
const InputPathList &inputPathQueue) const {
|
||||
GetTargetPhraseCollectionBatch(inputPathQueue);
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
|
||||
return out;
|
||||
}
|
||||
|
||||
/** returns cached scores */
|
||||
/** returns cached scores */
|
||||
const Scores*
|
||||
TranslationOption::
|
||||
GetLexReorderingScores(LexicalReordering const* scoreProducer) const
|
||||
|
@ -164,7 +164,7 @@ public:
|
||||
// }
|
||||
|
||||
void CacheLexReorderingScores(const LexicalReordering &scoreProducer,
|
||||
const Scores &score);
|
||||
const Scores &score);
|
||||
|
||||
TO_STRING();
|
||||
|
||||
|
@ -57,7 +57,7 @@ namespace Moses
|
||||
* called by inherited classe */
|
||||
TranslationOptionCollection::
|
||||
TranslationOptionCollection(ttasksptr const& ttask,
|
||||
InputType const& src,
|
||||
InputType const& src,
|
||||
size_t maxNoTransOptPerCoverage,
|
||||
float translationOptionThreshold)
|
||||
: m_ttask(ttask)
|
||||
@ -626,14 +626,13 @@ CacheLexReordering()
|
||||
{
|
||||
size_t const stop = m_source.GetSize();
|
||||
typedef StatefulFeatureFunction sfFF;
|
||||
BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions())
|
||||
{
|
||||
if (typeid(*ff) != typeid(LexicalReordering)) continue;
|
||||
LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
|
||||
for (size_t s = 0 ; s < stop ; s++)
|
||||
BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
|
||||
lr.SetCache(tol);
|
||||
}
|
||||
BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) {
|
||||
if (typeid(*ff) != typeid(LexicalReordering)) continue;
|
||||
LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
|
||||
for (size_t s = 0 ; s < stop ; s++)
|
||||
BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
|
||||
lr.SetCache(tol);
|
||||
}
|
||||
}
|
||||
|
||||
//! list of trans opt for a particular span
|
||||
|
@ -75,7 +75,7 @@ protected:
|
||||
InputPathList m_inputPathQueue;
|
||||
|
||||
TranslationOptionCollection(ttasksptr const& ttask,
|
||||
InputType const& src, size_t maxNoTransOptPerCoverage,
|
||||
InputType const& src, size_t maxNoTransOptPerCoverage,
|
||||
float translationOptionThreshold);
|
||||
|
||||
void CalcFutureScore();
|
||||
@ -177,8 +177,7 @@ public:
|
||||
return m_inputPathQueue;
|
||||
}
|
||||
|
||||
ttasksptr GetTranslationTask() const
|
||||
{
|
||||
ttasksptr GetTranslationTask() const {
|
||||
return m_ttask.lock();
|
||||
}
|
||||
TO_STRING();
|
||||
|
@ -21,7 +21,7 @@ namespace Moses
|
||||
/** constructor; just initialize the base class */
|
||||
TranslationOptionCollectionConfusionNet::
|
||||
TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
||||
const ConfusionNet &input,
|
||||
const ConfusionNet &input,
|
||||
size_t maxNoTransOptPerCoverage,
|
||||
float translationOptionThreshold)
|
||||
: TranslationOptionCollection(ttask,input, maxNoTransOptPerCoverage,
|
||||
|
@ -23,7 +23,7 @@ TranslationOptionCollectionLattice
|
||||
( ttasksptr const& ttask, const WordLattice &input,
|
||||
size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||
: TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage,
|
||||
translationOptionThreshold)
|
||||
translationOptionThreshold)
|
||||
{
|
||||
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
|
||||
"Not for models using the legqacy binary phrase table");
|
||||
|
@ -53,7 +53,7 @@ TranslationTask
|
||||
boost::shared_ptr<TranslationTask>
|
||||
TranslationTask
|
||||
::create(boost::shared_ptr<InputType> const& source,
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
{
|
||||
boost::shared_ptr<TranslationTask> ret(new TranslationTask(source, ioWrapper));
|
||||
ret->m_self = ret;
|
||||
@ -63,7 +63,7 @@ TranslationTask
|
||||
|
||||
TranslationTask
|
||||
::TranslationTask(boost::shared_ptr<InputType> const& source,
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
: m_source(source) , m_ioWrapper(ioWrapper)
|
||||
{ }
|
||||
|
||||
@ -82,37 +82,33 @@ TranslationTask
|
||||
if (!staticData.IsSyntax(algo))
|
||||
manager.reset(new Manager(this->self())); // phrase-based
|
||||
|
||||
else if (algo == SyntaxF2S || algo == SyntaxT2S)
|
||||
{ // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
|
||||
typedef Syntax::F2S::RuleMatcherCallback Callback;
|
||||
typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
|
||||
manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
|
||||
}
|
||||
else if (algo == SyntaxF2S || algo == SyntaxT2S) {
|
||||
// STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
|
||||
typedef Syntax::F2S::RuleMatcherCallback Callback;
|
||||
typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
|
||||
manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
|
||||
}
|
||||
|
||||
else if (algo == SyntaxS2T)
|
||||
{ // new-style string-to-tree decoding (ask Phil Williams)
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
if (algorithm == RecursiveCYKPlus)
|
||||
{
|
||||
typedef Syntax::S2T::EagerParserCallback Callback;
|
||||
typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
|
||||
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
|
||||
}
|
||||
else if (algorithm == Scope3)
|
||||
{
|
||||
typedef Syntax::S2T::StandardParserCallback Callback;
|
||||
typedef Syntax::S2T::Scope3Parser<Callback> Parser;
|
||||
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
|
||||
}
|
||||
else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
|
||||
}
|
||||
else if (algo == SyntaxS2T) {
|
||||
// new-style string-to-tree decoding (ask Phil Williams)
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
if (algorithm == RecursiveCYKPlus) {
|
||||
typedef Syntax::S2T::EagerParserCallback Callback;
|
||||
typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
|
||||
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
|
||||
} else if (algorithm == Scope3) {
|
||||
typedef Syntax::S2T::StandardParserCallback Callback;
|
||||
typedef Syntax::S2T::Scope3Parser<Callback> Parser;
|
||||
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
|
||||
} else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
|
||||
}
|
||||
|
||||
else if (algo == SyntaxT2S_SCFG)
|
||||
{ // SCFG-based tree-to-string decoding (ask Phil Williams)
|
||||
typedef Syntax::F2S::RuleMatcherCallback Callback;
|
||||
typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
|
||||
manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
|
||||
}
|
||||
else if (algo == SyntaxT2S_SCFG) {
|
||||
// SCFG-based tree-to-string decoding (ask Phil Williams)
|
||||
typedef Syntax::F2S::RuleMatcherCallback Callback;
|
||||
typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
|
||||
manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
|
||||
}
|
||||
|
||||
else if (algo == ChartIncremental) // Ken's incremental decoding
|
||||
manager.reset(new Incremental::Manager(this->self()));
|
||||
@ -126,8 +122,8 @@ TranslationTask
|
||||
void TranslationTask::Run()
|
||||
{
|
||||
UTIL_THROW_IF2(!m_source || !m_ioWrapper,
|
||||
"Base Instances of TranslationTask must be initialized with"
|
||||
<< " input and iowrapper.");
|
||||
"Base Instances of TranslationTask must be initialized with"
|
||||
<< " input and iowrapper.");
|
||||
|
||||
|
||||
// shorthand for "global data"
|
||||
@ -152,7 +148,7 @@ void TranslationTask::Run()
|
||||
boost::shared_ptr<BaseManager> manager = SetupManager();
|
||||
|
||||
VERBOSE(1, "Line " << translationId << ": Initialize search took "
|
||||
<< initTime << " seconds total" << endl);
|
||||
<< initTime << " seconds total" << endl);
|
||||
|
||||
manager->Decode();
|
||||
|
||||
@ -209,9 +205,9 @@ void TranslationTask::Run()
|
||||
// report additional statistics
|
||||
manager->CalcDecoderStatistics();
|
||||
VERBOSE(1, "Line " << translationId << ": Additional reporting took "
|
||||
<< additionalReportingTime << " seconds total" << endl);
|
||||
<< additionalReportingTime << " seconds total" << endl);
|
||||
VERBOSE(1, "Line " << translationId << ": Translation took "
|
||||
<< translationTime << " seconds total" << endl);
|
||||
<< translationTime << " seconds total" << endl);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("Sentence Decoding Time:");
|
||||
}
|
||||
|
@ -40,7 +40,9 @@ class TranslationTask : public Moses::Task
|
||||
TranslationTask(TranslationTask const& other) { }
|
||||
|
||||
TranslationTask const&
|
||||
operator=(TranslationTask const& other) { return *this; }
|
||||
operator=(TranslationTask const& other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself
|
||||
@ -48,7 +50,7 @@ protected:
|
||||
// pointer to ContextScope, which stores context-specific information
|
||||
TranslationTask() { } ;
|
||||
TranslationTask(boost::shared_ptr<Moses::InputType> const& source,
|
||||
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
|
||||
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
|
||||
// Yes, the constructor is protected.
|
||||
//
|
||||
// TranslationTasks can only be created through the creator
|
||||
@ -68,11 +70,15 @@ protected:
|
||||
public:
|
||||
|
||||
boost::shared_ptr<TranslationTask>
|
||||
self() { return m_self.lock(); }
|
||||
self() {
|
||||
return m_self.lock();
|
||||
}
|
||||
|
||||
virtual
|
||||
boost::shared_ptr<TranslationTask const>
|
||||
self() const { return m_self.lock(); }
|
||||
self() const {
|
||||
return m_self.lock();
|
||||
}
|
||||
|
||||
// creator functions
|
||||
static boost::shared_ptr<TranslationTask> create();
|
||||
@ -84,7 +90,7 @@ public:
|
||||
static
|
||||
boost::shared_ptr<TranslationTask>
|
||||
create(boost::shared_ptr<Moses::InputType> const& source,
|
||||
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
|
||||
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
|
||||
|
||||
~TranslationTask();
|
||||
/** Translate one sentence
|
||||
@ -92,15 +98,16 @@ public:
|
||||
virtual void Run();
|
||||
|
||||
boost::shared_ptr<Moses::InputType>
|
||||
GetSource() const { return m_source; }
|
||||
GetSource() const {
|
||||
return m_source;
|
||||
}
|
||||
|
||||
boost::shared_ptr<BaseManager>
|
||||
SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm);
|
||||
|
||||
|
||||
boost::shared_ptr<ContextScope> const&
|
||||
GetScope() const
|
||||
{
|
||||
GetScope() const {
|
||||
UTIL_THROW_IF2(m_scope == NULL, "No context scope!");
|
||||
return m_scope;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class TranslationTask;
|
||||
class TranslationTask;
|
||||
//! @todo what is this?
|
||||
class XMLParseOutput
|
||||
{
|
||||
|
@ -427,7 +427,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
|
||||
out << *this; \
|
||||
return out.str(); \
|
||||
} \
|
||||
|
||||
|
||||
//! delete and remove every element of a collection object such as set, list etc
|
||||
template<class COLL>
|
||||
void RemoveAllInColl(COLL &coll)
|
||||
|
@ -3,70 +3,67 @@
|
||||
|
||||
namespace MosesServer
|
||||
{
|
||||
using namespace std;
|
||||
using namespace std;
|
||||
|
||||
Optimizer::
|
||||
Optimizer()
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Optimizes multi-model translation model";
|
||||
}
|
||||
|
||||
void
|
||||
Optimizer::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
#ifdef WITH_DLIB
|
||||
const params_t params = paramList.getStruct(0);
|
||||
params_t::const_iterator si;
|
||||
if ((si = params.find("model_name")) == params.end())
|
||||
{
|
||||
string msg = "Missing name of model to be optimized";
|
||||
msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
|
||||
throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
const string model_name = xmlrpc_c::value_string(si->second);
|
||||
|
||||
if ((si = params.find("phrase_pairs")) == params.end())
|
||||
{
|
||||
throw xmlrpc_c::fault("Missing list of phrase pairs",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
|
||||
|
||||
vector<pair<string, string> > phrase_pairs;
|
||||
|
||||
xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
|
||||
for (size_t i = 0; i < ppValVec.size(); ++i)
|
||||
{
|
||||
xmlrpc_c::value_array pp_array
|
||||
= xmlrpc_c::value_array(ppValVec[i]);
|
||||
vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
|
||||
string L1 = xmlrpc_c::value_string(pp[0]);
|
||||
string L2 = xmlrpc_c::value_string(pp[1]);
|
||||
phrase_pairs.push_back(make_pair(L1,L2));
|
||||
}
|
||||
|
||||
// PhraseDictionaryMultiModel* pdmm
|
||||
// = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
|
||||
vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
|
||||
|
||||
vector<xmlrpc_c::value> weight_vector_ret;
|
||||
for (size_t i=0;i < weight_vector.size();i++)
|
||||
weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
|
||||
|
||||
*retvalP = xmlrpc_c::value_array(weight_vector_ret);
|
||||
#else
|
||||
string errmsg = "Error: Perplexity minimization requires dlib ";
|
||||
errmsg += "(compilation option --with-dlib)";
|
||||
std::cerr << errmsg << std::endl;
|
||||
*retvalP = xmlrpc_c::value_string(errmsg);
|
||||
#endif
|
||||
}
|
||||
Optimizer::
|
||||
Optimizer()
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Optimizes multi-model translation model";
|
||||
}
|
||||
|
||||
void
|
||||
Optimizer::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
#ifdef WITH_DLIB
|
||||
const params_t params = paramList.getStruct(0);
|
||||
params_t::const_iterator si;
|
||||
if ((si = params.find("model_name")) == params.end()) {
|
||||
string msg = "Missing name of model to be optimized";
|
||||
msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
|
||||
throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
const string model_name = xmlrpc_c::value_string(si->second);
|
||||
|
||||
if ((si = params.find("phrase_pairs")) == params.end()) {
|
||||
throw xmlrpc_c::fault("Missing list of phrase pairs",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
|
||||
|
||||
vector<pair<string, string> > phrase_pairs;
|
||||
|
||||
xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
|
||||
for (size_t i = 0; i < ppValVec.size(); ++i) {
|
||||
xmlrpc_c::value_array pp_array
|
||||
= xmlrpc_c::value_array(ppValVec[i]);
|
||||
vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
|
||||
string L1 = xmlrpc_c::value_string(pp[0]);
|
||||
string L2 = xmlrpc_c::value_string(pp[1]);
|
||||
phrase_pairs.push_back(make_pair(L1,L2));
|
||||
}
|
||||
|
||||
// PhraseDictionaryMultiModel* pdmm
|
||||
// = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
|
||||
vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
|
||||
|
||||
vector<xmlrpc_c::value> weight_vector_ret;
|
||||
for (size_t i=0; i < weight_vector.size(); i++)
|
||||
weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
|
||||
|
||||
*retvalP = xmlrpc_c::value_array(weight_vector_ret);
|
||||
#else
|
||||
string errmsg = "Error: Perplexity minimization requires dlib ";
|
||||
errmsg += "(compilation option --with-dlib)";
|
||||
std::cerr << errmsg << std::endl;
|
||||
*retvalP = xmlrpc_c::value_string(errmsg);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -6,12 +6,12 @@
|
||||
|
||||
namespace MosesServer
|
||||
{
|
||||
class
|
||||
class
|
||||
Optimizer : public xmlrpc_c::method
|
||||
{
|
||||
public:
|
||||
Optimizer();
|
||||
void execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
};
|
||||
{
|
||||
public:
|
||||
Optimizer();
|
||||
void execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
};
|
||||
}
|
||||
|
@ -3,372 +3,363 @@
|
||||
|
||||
namespace MosesServer
|
||||
{
|
||||
using namespace std;
|
||||
using Moses::Hypothesis;
|
||||
using Moses::StaticData;
|
||||
using Moses::WordsRange;
|
||||
using Moses::ChartHypothesis;
|
||||
using Moses::Phrase;
|
||||
using Moses::Manager;
|
||||
using Moses::SearchGraphNode;
|
||||
using Moses::TrellisPathList;
|
||||
using Moses::TranslationOptionCollection;
|
||||
using Moses::TranslationOptionList;
|
||||
using Moses::TranslationOption;
|
||||
using Moses::TargetPhrase;
|
||||
using Moses::FValue;
|
||||
using Moses::PhraseDictionaryMultiModel;
|
||||
using Moses::FindPhraseDictionary;
|
||||
using Moses::Sentence;
|
||||
using namespace std;
|
||||
using Moses::Hypothesis;
|
||||
using Moses::StaticData;
|
||||
using Moses::WordsRange;
|
||||
using Moses::ChartHypothesis;
|
||||
using Moses::Phrase;
|
||||
using Moses::Manager;
|
||||
using Moses::SearchGraphNode;
|
||||
using Moses::TrellisPathList;
|
||||
using Moses::TranslationOptionCollection;
|
||||
using Moses::TranslationOptionList;
|
||||
using Moses::TranslationOption;
|
||||
using Moses::TargetPhrase;
|
||||
using Moses::FValue;
|
||||
using Moses::PhraseDictionaryMultiModel;
|
||||
using Moses::FindPhraseDictionary;
|
||||
using Moses::Sentence;
|
||||
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
TranslationRequest::
|
||||
create(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut)
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
TranslationRequest::
|
||||
create(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut)
|
||||
{
|
||||
boost::shared_ptr<TranslationRequest> ret;
|
||||
ret.reset(new TranslationRequest(paramList,cond, mut));
|
||||
ret->m_self = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
Run()
|
||||
{
|
||||
parse_request(m_paramList.getStruct(0));
|
||||
|
||||
Moses::StaticData const& SD = Moses::StaticData::Instance();
|
||||
|
||||
//Make sure alternative paths are retained, if necessary
|
||||
if (m_withGraphInfo || m_nbestSize>0)
|
||||
// why on earth is this a global variable? Is this even thread-safe???? UG
|
||||
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
|
||||
|
||||
std::stringstream out, graphInfo, transCollOpts;
|
||||
|
||||
if (SD.IsSyntax())
|
||||
run_chart_decoder();
|
||||
else
|
||||
run_phrase_decoder();
|
||||
|
||||
XVERBOSE(1,"Output: " << out.str() << endl);
|
||||
{
|
||||
boost::shared_ptr<TranslationRequest> ret;
|
||||
ret.reset(new TranslationRequest(paramList,cond, mut));
|
||||
ret->m_self = ret;
|
||||
return ret;
|
||||
boost::lock_guard<boost::mutex> lock(m_mutex);
|
||||
m_done = true;
|
||||
}
|
||||
m_cond.notify_one();
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
Run()
|
||||
{
|
||||
parse_request(m_paramList.getStruct(0));
|
||||
}
|
||||
|
||||
Moses::StaticData const& SD = Moses::StaticData::Instance();
|
||||
/// add phrase alignment information from a Hypothesis
|
||||
void
|
||||
TranslationRequest::
|
||||
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
|
||||
{
|
||||
if (!m_withAlignInfo) return;
|
||||
WordsRange const& trg = h.GetCurrTargetWordsRange();
|
||||
WordsRange const& src = h.GetCurrSourceWordsRange();
|
||||
|
||||
//Make sure alternative paths are retained, if necessary
|
||||
if (m_withGraphInfo || m_nbestSize>0)
|
||||
// why on earth is this a global variable? Is this even thread-safe???? UG
|
||||
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
|
||||
std::map<std::string, xmlrpc_c::value> pAlnInfo;
|
||||
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
|
||||
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
|
||||
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
|
||||
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
|
||||
}
|
||||
|
||||
std::stringstream out, graphInfo, transCollOpts;
|
||||
void
|
||||
TranslationRequest::
|
||||
outputChartHypo(ostream& out, const ChartHypothesis* hypo)
|
||||
{
|
||||
Phrase outPhrase(20);
|
||||
hypo->GetOutputPhrase(outPhrase);
|
||||
|
||||
if (SD.IsSyntax())
|
||||
run_chart_decoder();
|
||||
else
|
||||
run_phrase_decoder();
|
||||
// delete 1st & last
|
||||
assert(outPhrase.GetSize() >= 2);
|
||||
outPhrase.RemoveWord(0);
|
||||
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
||||
for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
|
||||
out << *outPhrase.GetFactor(pos, 0) << " ";
|
||||
}
|
||||
|
||||
XVERBOSE(1,"Output: " << out.str() << endl);
|
||||
{
|
||||
boost::lock_guard<boost::mutex> lock(m_mutex);
|
||||
m_done = true;
|
||||
bool
|
||||
TranslationRequest::
|
||||
compareSearchGraphNode(const Moses::SearchGraphNode& a,
|
||||
const Moses::SearchGraphNode& b)
|
||||
{
|
||||
return a.hypo->GetId() < b.hypo->GetId();
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
using xmlrpc_c::value_int;
|
||||
using xmlrpc_c::value_double;
|
||||
using xmlrpc_c::value_struct;
|
||||
using xmlrpc_c::value_string;
|
||||
vector<xmlrpc_c::value> searchGraphXml;
|
||||
vector<SearchGraphNode> searchGraph;
|
||||
manager.GetSearchGraph(searchGraph);
|
||||
std::sort(searchGraph.begin(), searchGraph.end());
|
||||
BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) {
|
||||
map<string, xmlrpc_c::value> x; // search graph xml node
|
||||
x["forward"] = value_double(n.forward);
|
||||
x["fscore"] = value_double(n.fscore);
|
||||
const Hypothesis* hypo = n.hypo;
|
||||
x["hyp"] = value_int(hypo->GetId());
|
||||
x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
|
||||
if (hypo->GetId() != 0) {
|
||||
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
||||
x["back"] = value_int(prevHypo->GetId());
|
||||
x["score"] = value_double(hypo->GetScore());
|
||||
x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
|
||||
if (n.recombinationHypo)
|
||||
x["recombined"] = value_int(n.recombinationHypo->GetId());
|
||||
x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
|
||||
x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
|
||||
x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
|
||||
}
|
||||
m_cond.notify_one();
|
||||
|
||||
searchGraphXml.push_back(value_struct(x));
|
||||
}
|
||||
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
|
||||
}
|
||||
|
||||
/// add phrase alignment information from a Hypothesis
|
||||
void
|
||||
TranslationRequest::
|
||||
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
|
||||
{
|
||||
if (!m_withAlignInfo) return;
|
||||
WordsRange const& trg = h.GetCurrTargetWordsRange();
|
||||
WordsRange const& src = h.GetCurrSourceWordsRange();
|
||||
void
|
||||
TranslationRequest::
|
||||
output_phrase(ostream& out, Phrase const& phrase) const
|
||||
{
|
||||
if (!m_reportAllFactors) {
|
||||
for (size_t i = 0 ; i < phrase.GetSize(); ++i)
|
||||
out << *phrase.GetFactor(i, 0) << " ";
|
||||
} else out << phrase;
|
||||
}
|
||||
|
||||
std::map<std::string, xmlrpc_c::value> pAlnInfo;
|
||||
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
|
||||
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
|
||||
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
|
||||
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
|
||||
void
|
||||
TranslationRequest::
|
||||
outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
TrellisPathList nBestList;
|
||||
vector<xmlrpc_c::value> nBestXml;
|
||||
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
|
||||
|
||||
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
|
||||
vector<const Hypothesis *> const& E = path->GetEdges();
|
||||
if (!E.size()) continue;
|
||||
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
|
||||
pack_hypothesis(E, "hyp", nBestXmlItem);
|
||||
if (m_withScoreBreakdown) {
|
||||
// should the score breakdown be reported in a more structured manner?
|
||||
ostringstream buf;
|
||||
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
|
||||
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
|
||||
}
|
||||
|
||||
// weighted score
|
||||
nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
|
||||
nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
|
||||
}
|
||||
retData["nbest"] = xmlrpc_c::value_array(nBestXml);
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
outputChartHypo(ostream& out, const ChartHypothesis* hypo)
|
||||
{
|
||||
Phrase outPhrase(20);
|
||||
hypo->GetOutputPhrase(outPhrase);
|
||||
void
|
||||
TranslationRequest::
|
||||
insertTranslationOptions(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
const TranslationOptionCollection* toptsColl
|
||||
= manager.getSntTranslationOptions();
|
||||
vector<xmlrpc_c::value> toptsXml;
|
||||
size_t const stop = toptsColl->GetSource().GetSize();
|
||||
TranslationOptionList const* tol;
|
||||
for (size_t s = 0 ; s < stop ; ++s) {
|
||||
for (size_t e = s;
|
||||
(tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
|
||||
++e) {
|
||||
BOOST_FOREACH(TranslationOption const* topt, *tol) {
|
||||
std::map<std::string, xmlrpc_c::value> toptXml;
|
||||
TargetPhrase const& tp = topt->GetTargetPhrase();
|
||||
StaticData const& GLOBAL = StaticData::Instance();
|
||||
std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
|
||||
toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
|
||||
toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
|
||||
toptXml["start"] = xmlrpc_c::value_int(s);
|
||||
toptXml["end"] = xmlrpc_c::value_int(e);
|
||||
vector<xmlrpc_c::value> scoresXml;
|
||||
const std::valarray<FValue> &scores
|
||||
= topt->GetScoreBreakdown().getCoreFeatures();
|
||||
for (size_t j = 0; j < scores.size(); ++j)
|
||||
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
|
||||
|
||||
// delete 1st & last
|
||||
assert(outPhrase.GetSize() >= 2);
|
||||
outPhrase.RemoveWord(0);
|
||||
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
||||
for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
|
||||
out << *outPhrase.GetFactor(pos, 0) << " ";
|
||||
}
|
||||
|
||||
bool
|
||||
TranslationRequest::
|
||||
compareSearchGraphNode(const Moses::SearchGraphNode& a,
|
||||
const Moses::SearchGraphNode& b)
|
||||
{ return a.hypo->GetId() < b.hypo->GetId(); }
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
using xmlrpc_c::value_int;
|
||||
using xmlrpc_c::value_double;
|
||||
using xmlrpc_c::value_struct;
|
||||
using xmlrpc_c::value_string;
|
||||
vector<xmlrpc_c::value> searchGraphXml;
|
||||
vector<SearchGraphNode> searchGraph;
|
||||
manager.GetSearchGraph(searchGraph);
|
||||
std::sort(searchGraph.begin(), searchGraph.end());
|
||||
BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph)
|
||||
{
|
||||
map<string, xmlrpc_c::value> x; // search graph xml node
|
||||
x["forward"] = value_double(n.forward);
|
||||
x["fscore"] = value_double(n.fscore);
|
||||
const Hypothesis* hypo = n.hypo;
|
||||
x["hyp"] = value_int(hypo->GetId());
|
||||
x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
|
||||
if (hypo->GetId() != 0)
|
||||
{
|
||||
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
||||
x["back"] = value_int(prevHypo->GetId());
|
||||
x["score"] = value_double(hypo->GetScore());
|
||||
x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
|
||||
if (n.recombinationHypo)
|
||||
x["recombined"] = value_int(n.recombinationHypo->GetId());
|
||||
x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
|
||||
x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
|
||||
x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
|
||||
}
|
||||
searchGraphXml.push_back(value_struct(x));
|
||||
toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
|
||||
toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
|
||||
}
|
||||
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
|
||||
}
|
||||
}
|
||||
retData["topt"] = xmlrpc_c::value_array(toptsXml);
|
||||
}
|
||||
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
return (params.find(key) != params.end());
|
||||
}
|
||||
|
||||
TranslationRequest::
|
||||
TranslationRequest(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond, boost::mutex& mut)
|
||||
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
|
||||
{ }
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
{
|
||||
// parse XMLRPC request
|
||||
// params_t const params = m_paramList.getStruct(0);
|
||||
m_paramList.verifyEnd(1); // ??? UG
|
||||
|
||||
// source text must be given, or we don't know what to translate
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
params_t::const_iterator si = params.find("text");
|
||||
if (si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
|
||||
m_source_string = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"Input: " << m_source_string << endl);
|
||||
|
||||
m_withAlignInfo = check(params, "align");
|
||||
m_withWordAlignInfo = check(params, "word-align");
|
||||
m_withGraphInfo = check(params, "sg");
|
||||
m_withTopts = check(params, "topt");
|
||||
m_reportAllFactors = check(params, "report-all-factors");
|
||||
m_nbestDistinct = check(params, "nbest-distinct");
|
||||
m_withScoreBreakdown = check(params, "add-score-breakdown");
|
||||
m_source.reset(new Sentence(0,m_source_string));
|
||||
si = params.find("lambda");
|
||||
if (si != params.end()) {
|
||||
// muMo = multiModel
|
||||
xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
|
||||
vector<float> w(muMoValVec.size());
|
||||
for (size_t i = 0; i < muMoValVec.size(); ++i)
|
||||
w[i] = xmlrpc_c::value_double(muMoValVec[i]);
|
||||
if (w.size() && (si = params.find("model_name")) != params.end()) {
|
||||
string const model_name = xmlrpc_c::value_string(si->second);
|
||||
PhraseDictionaryMultiModel* pdmm
|
||||
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
// Moses::PhraseDictionaryMultiModel* pdmm
|
||||
// = FindPhraseDictionary(model_name);
|
||||
pdmm->SetTemporaryMultiModelWeightsVector(w);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
output_phrase(ostream& out, Phrase const& phrase) const
|
||||
{
|
||||
if (!m_reportAllFactors)
|
||||
{
|
||||
for (size_t i = 0 ; i < phrase.GetSize(); ++i)
|
||||
out << *phrase.GetFactor(i, 0) << " ";
|
||||
}
|
||||
else out << phrase;
|
||||
// // biased sampling for suffix-array-based sampling phrase table?
|
||||
// if ((si = params.find("bias")) != params.end())
|
||||
// {
|
||||
// std::vector<xmlrpc_c::value> tmp
|
||||
// = xmlrpc_c::value_array(si->second).cvalue();
|
||||
// for (size_t i = 1; i < tmp.size(); i += 2)
|
||||
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
|
||||
// }
|
||||
} // end of Translationtask::parse_request()
|
||||
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
run_chart_decoder()
|
||||
{
|
||||
Moses::TreeInput tinput;
|
||||
istringstream buf(m_source_string + "\n");
|
||||
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
|
||||
|
||||
Moses::ChartManager manager(this->self());
|
||||
manager.Decode();
|
||||
|
||||
const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
|
||||
ostringstream out;
|
||||
outputChartHypo(out,hypo);
|
||||
|
||||
m_target_string = out.str();
|
||||
m_retData["text"] = xmlrpc_c::value_string(m_target_string);
|
||||
|
||||
if (m_withGraphInfo) {
|
||||
std::ostringstream sgstream;
|
||||
manager.OutputSearchGraphMoses(sgstream);
|
||||
m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
|
||||
}
|
||||
} // end of TranslationRequest::run_chart_decoder()
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
TrellisPathList nBestList;
|
||||
vector<xmlrpc_c::value> nBestXml;
|
||||
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
|
||||
void
|
||||
TranslationRequest::
|
||||
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
|
||||
map<string, xmlrpc_c::value> & dest) const
|
||||
{
|
||||
// target string
|
||||
ostringstream target;
|
||||
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
|
||||
output_phrase(target, e->GetCurrTargetPhrase());
|
||||
dest[key] = xmlrpc_c::value_string(target.str());
|
||||
|
||||
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList)
|
||||
{
|
||||
vector<const Hypothesis *> const& E = path->GetEdges();
|
||||
if (!E.size()) continue;
|
||||
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
|
||||
pack_hypothesis(E, "hyp", nBestXmlItem);
|
||||
if (m_withScoreBreakdown)
|
||||
{
|
||||
// should the score breakdown be reported in a more structured manner?
|
||||
ostringstream buf;
|
||||
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
|
||||
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
|
||||
}
|
||||
if (m_withAlignInfo) {
|
||||
// phrase alignment, if requested
|
||||
|
||||
// weighted score
|
||||
nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
|
||||
nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
|
||||
}
|
||||
retData["nbest"] = xmlrpc_c::value_array(nBestXml);
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
insertTranslationOptions(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
const TranslationOptionCollection* toptsColl
|
||||
= manager.getSntTranslationOptions();
|
||||
vector<xmlrpc_c::value> toptsXml;
|
||||
size_t const stop = toptsColl->GetSource().GetSize();
|
||||
TranslationOptionList const* tol;
|
||||
for (size_t s = 0 ; s < stop ; ++s)
|
||||
{
|
||||
for (size_t e = s;
|
||||
(tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
|
||||
++e)
|
||||
{
|
||||
BOOST_FOREACH(TranslationOption const* topt, *tol)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value> toptXml;
|
||||
TargetPhrase const& tp = topt->GetTargetPhrase();
|
||||
StaticData const& GLOBAL = StaticData::Instance();
|
||||
std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
|
||||
toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
|
||||
toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
|
||||
toptXml["start"] = xmlrpc_c::value_int(s);
|
||||
toptXml["end"] = xmlrpc_c::value_int(e);
|
||||
vector<xmlrpc_c::value> scoresXml;
|
||||
const std::valarray<FValue> &scores
|
||||
= topt->GetScoreBreakdown().getCoreFeatures();
|
||||
for (size_t j = 0; j < scores.size(); ++j)
|
||||
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
|
||||
|
||||
toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
|
||||
toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
|
||||
}
|
||||
}
|
||||
}
|
||||
retData["topt"] = xmlrpc_c::value_array(toptsXml);
|
||||
}
|
||||
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
return (params.find(key) != params.end());
|
||||
}
|
||||
|
||||
TranslationRequest::
|
||||
TranslationRequest(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond, boost::mutex& mut)
|
||||
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
|
||||
{ }
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
{ // parse XMLRPC request
|
||||
// params_t const params = m_paramList.getStruct(0);
|
||||
m_paramList.verifyEnd(1); // ??? UG
|
||||
|
||||
// source text must be given, or we don't know what to translate
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
params_t::const_iterator si = params.find("text");
|
||||
if (si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
|
||||
m_source_string = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"Input: " << m_source_string << endl);
|
||||
|
||||
m_withAlignInfo = check(params, "align");
|
||||
m_withWordAlignInfo = check(params, "word-align");
|
||||
m_withGraphInfo = check(params, "sg");
|
||||
m_withTopts = check(params, "topt");
|
||||
m_reportAllFactors = check(params, "report-all-factors");
|
||||
m_nbestDistinct = check(params, "nbest-distinct");
|
||||
m_withScoreBreakdown = check(params, "add-score-breakdown");
|
||||
m_source.reset(new Sentence(0,m_source_string));
|
||||
si = params.find("lambda");
|
||||
if (si != params.end())
|
||||
{
|
||||
// muMo = multiModel
|
||||
xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
|
||||
vector<float> w(muMoValVec.size());
|
||||
for (size_t i = 0; i < muMoValVec.size(); ++i)
|
||||
w[i] = xmlrpc_c::value_double(muMoValVec[i]);
|
||||
if (w.size() && (si = params.find("model_name")) != params.end())
|
||||
{
|
||||
string const model_name = xmlrpc_c::value_string(si->second);
|
||||
PhraseDictionaryMultiModel* pdmm
|
||||
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
// Moses::PhraseDictionaryMultiModel* pdmm
|
||||
// = FindPhraseDictionary(model_name);
|
||||
pdmm->SetTemporaryMultiModelWeightsVector(w);
|
||||
}
|
||||
}
|
||||
|
||||
// // biased sampling for suffix-array-based sampling phrase table?
|
||||
// if ((si = params.find("bias")) != params.end())
|
||||
// {
|
||||
// std::vector<xmlrpc_c::value> tmp
|
||||
// = xmlrpc_c::value_array(si->second).cvalue();
|
||||
// for (size_t i = 1; i < tmp.size(); i += 2)
|
||||
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
|
||||
// }
|
||||
} // end of Translationtask::parse_request()
|
||||
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
run_chart_decoder()
|
||||
{
|
||||
Moses::TreeInput tinput;
|
||||
istringstream buf(m_source_string + "\n");
|
||||
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
|
||||
|
||||
Moses::ChartManager manager(this->self());
|
||||
manager.Decode();
|
||||
|
||||
const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
|
||||
ostringstream out;
|
||||
outputChartHypo(out,hypo);
|
||||
|
||||
m_target_string = out.str();
|
||||
m_retData["text"] = xmlrpc_c::value_string(m_target_string);
|
||||
|
||||
if (m_withGraphInfo)
|
||||
{
|
||||
std::ostringstream sgstream;
|
||||
manager.OutputSearchGraphMoses(sgstream);
|
||||
m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
|
||||
}
|
||||
} // end of TranslationRequest::run_chart_decoder()
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
|
||||
map<string, xmlrpc_c::value> & dest) const
|
||||
{
|
||||
// target string
|
||||
ostringstream target;
|
||||
vector<xmlrpc_c::value> p_aln;
|
||||
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
|
||||
output_phrase(target, e->GetCurrTargetPhrase());
|
||||
dest[key] = xmlrpc_c::value_string(target.str());
|
||||
|
||||
if (m_withAlignInfo)
|
||||
{ // phrase alignment, if requested
|
||||
|
||||
vector<xmlrpc_c::value> p_aln;
|
||||
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
|
||||
add_phrase_aln_info(*e, p_aln);
|
||||
dest["align"] = xmlrpc_c::value_array(p_aln);
|
||||
}
|
||||
|
||||
if (m_withWordAlignInfo)
|
||||
{ // word alignment, if requested
|
||||
vector<xmlrpc_c::value> w_aln;
|
||||
BOOST_FOREACH(Hypothesis const* e, edges)
|
||||
e->OutputLocalWordAlignment(w_aln);
|
||||
dest["word-align"] = xmlrpc_c::value_array(w_aln);
|
||||
}
|
||||
add_phrase_aln_info(*e, p_aln);
|
||||
dest["align"] = xmlrpc_c::value_array(p_aln);
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
pack_hypothesis(Hypothesis const* h, string const& key,
|
||||
map<string, xmlrpc_c::value>& dest) const
|
||||
{
|
||||
using namespace std;
|
||||
vector<Hypothesis const*> edges;
|
||||
for (;h; h = h->GetPrevHypo())
|
||||
edges.push_back(h);
|
||||
pack_hypothesis(edges, key, dest);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
run_phrase_decoder()
|
||||
{
|
||||
Manager manager(this->self());
|
||||
// if (m_bias.size()) manager.SetBias(&m_bias);
|
||||
manager.Decode();
|
||||
|
||||
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
|
||||
|
||||
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
|
||||
if (m_withTopts) insertTranslationOptions(manager,m_retData);
|
||||
if (m_nbestSize) outputNBest(manager, m_retData);
|
||||
|
||||
(const_cast<StaticData&>(Moses::StaticData::Instance()))
|
||||
.SetOutputSearchGraph(false);
|
||||
// WTF? one more reason not to have this as global variable! --- UG
|
||||
|
||||
if (m_withWordAlignInfo) {
|
||||
// word alignment, if requested
|
||||
vector<xmlrpc_c::value> w_aln;
|
||||
BOOST_FOREACH(Hypothesis const* e, edges)
|
||||
e->OutputLocalWordAlignment(w_aln);
|
||||
dest["word-align"] = xmlrpc_c::value_array(w_aln);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
pack_hypothesis(Hypothesis const* h, string const& key,
|
||||
map<string, xmlrpc_c::value>& dest) const
|
||||
{
|
||||
using namespace std;
|
||||
vector<Hypothesis const*> edges;
|
||||
for (; h; h = h->GetPrevHypo())
|
||||
edges.push_back(h);
|
||||
pack_hypothesis(edges, key, dest);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
run_phrase_decoder()
|
||||
{
|
||||
Manager manager(this->self());
|
||||
// if (m_bias.size()) manager.SetBias(&m_bias);
|
||||
manager.Decode();
|
||||
|
||||
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
|
||||
|
||||
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
|
||||
if (m_withTopts) insertTranslationOptions(manager,m_retData);
|
||||
if (m_nbestSize) outputNBest(manager, m_retData);
|
||||
|
||||
(const_cast<StaticData&>(Moses::StaticData::Instance()))
|
||||
.SetOutputSearchGraph(false);
|
||||
// WTF? one more reason not to have this as global variable! --- UG
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -23,100 +23,106 @@
|
||||
#include <xmlrpc-c/base.hpp>
|
||||
namespace MosesServer
|
||||
{
|
||||
class
|
||||
class
|
||||
TranslationRequest : public virtual Moses::TranslationTask
|
||||
{
|
||||
boost::condition_variable& m_cond;
|
||||
boost::mutex& m_mutex;
|
||||
bool m_done;
|
||||
{
|
||||
boost::condition_variable& m_cond;
|
||||
boost::mutex& m_mutex;
|
||||
bool m_done;
|
||||
|
||||
xmlrpc_c::paramList const& m_paramList;
|
||||
std::map<std::string, xmlrpc_c::value> m_retData;
|
||||
std::map<uint32_t,float> m_bias; // for biased sampling
|
||||
xmlrpc_c::paramList const& m_paramList;
|
||||
std::map<std::string, xmlrpc_c::value> m_retData;
|
||||
std::map<uint32_t,float> m_bias; // for biased sampling
|
||||
|
||||
std::string m_source_string, m_target_string;
|
||||
bool m_withAlignInfo;
|
||||
bool m_withWordAlignInfo;
|
||||
bool m_withGraphInfo;
|
||||
bool m_withTopts;
|
||||
bool m_reportAllFactors;
|
||||
bool m_nbestDistinct;
|
||||
bool m_withScoreBreakdown;
|
||||
size_t m_nbestSize;
|
||||
std::string m_source_string, m_target_string;
|
||||
bool m_withAlignInfo;
|
||||
bool m_withWordAlignInfo;
|
||||
bool m_withGraphInfo;
|
||||
bool m_withTopts;
|
||||
bool m_reportAllFactors;
|
||||
bool m_nbestDistinct;
|
||||
bool m_withScoreBreakdown;
|
||||
size_t m_nbestSize;
|
||||
|
||||
void
|
||||
parse_request();
|
||||
void
|
||||
parse_request();
|
||||
|
||||
void
|
||||
parse_request(std::map<std::string, xmlrpc_c::value> const& req);
|
||||
void
|
||||
parse_request(std::map<std::string, xmlrpc_c::value> const& req);
|
||||
|
||||
virtual void
|
||||
run_chart_decoder();
|
||||
virtual void
|
||||
run_chart_decoder();
|
||||
|
||||
virtual void
|
||||
run_phrase_decoder();
|
||||
virtual void
|
||||
run_phrase_decoder();
|
||||
|
||||
void
|
||||
pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
|
||||
std::string const& key,
|
||||
std::map<std::string, xmlrpc_c::value> & dest) const;
|
||||
void
|
||||
pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
|
||||
std::string const& key,
|
||||
std::map<std::string, xmlrpc_c::value> & dest) const;
|
||||
|
||||
void
|
||||
pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
|
||||
std::map<std::string, xmlrpc_c::value> & dest) const;
|
||||
void
|
||||
pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
|
||||
std::map<std::string, xmlrpc_c::value> & dest) const;
|
||||
|
||||
|
||||
void
|
||||
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
|
||||
void
|
||||
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
|
||||
|
||||
void
|
||||
add_phrase_aln_info(Moses::Hypothesis const& h,
|
||||
std::vector<xmlrpc_c::value>& aInfo) const;
|
||||
void
|
||||
add_phrase_aln_info(Moses::Hypothesis const& h,
|
||||
std::vector<xmlrpc_c::value>& aInfo) const;
|
||||
|
||||
void
|
||||
outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
|
||||
void
|
||||
outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
|
||||
|
||||
bool
|
||||
compareSearchGraphNode(const Moses::SearchGraphNode& a,
|
||||
const Moses::SearchGraphNode& b);
|
||||
bool
|
||||
compareSearchGraphNode(const Moses::SearchGraphNode& a,
|
||||
const Moses::SearchGraphNode& b);
|
||||
|
||||
void
|
||||
insertGraphInfo(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
void
|
||||
outputNBest(Moses::Manager const& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
void
|
||||
insertGraphInfo(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
void
|
||||
outputNBest(Moses::Manager const& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
|
||||
void
|
||||
insertTranslationOptions(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
protected:
|
||||
TranslationRequest(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut);
|
||||
void
|
||||
insertTranslationOptions(Moses::Manager& manager,
|
||||
std::map<std::string, xmlrpc_c::value>& retData);
|
||||
protected:
|
||||
TranslationRequest(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut);
|
||||
|
||||
public:
|
||||
public:
|
||||
|
||||
static
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
create(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut);
|
||||
static
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
create(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut);
|
||||
|
||||
|
||||
virtual bool
|
||||
DeleteAfterExecution() { return false; }
|
||||
virtual bool
|
||||
DeleteAfterExecution() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
IsDone() const { return m_done; }
|
||||
bool
|
||||
IsDone() const {
|
||||
return m_done;
|
||||
}
|
||||
|
||||
std::map<std::string, xmlrpc_c::value> const&
|
||||
GetRetData() { return m_retData; }
|
||||
std::map<std::string, xmlrpc_c::value> const&
|
||||
GetRetData() {
|
||||
return m_retData;
|
||||
}
|
||||
|
||||
void
|
||||
Run();
|
||||
void
|
||||
Run();
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,34 +4,34 @@
|
||||
namespace MosesServer
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
|
||||
Translator::
|
||||
Translator(size_t numThreads)
|
||||
: m_threadPool(numThreads)
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Does translation";
|
||||
}
|
||||
Translator::
|
||||
Translator(size_t numThreads)
|
||||
: m_threadPool(numThreads)
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Does translation";
|
||||
}
|
||||
|
||||
void
|
||||
Translator::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
boost::condition_variable cond;
|
||||
boost::mutex mut;
|
||||
boost::shared_ptr<TranslationRequest> task
|
||||
= TranslationRequest::create(paramList,cond,mut);
|
||||
m_threadPool.Submit(task);
|
||||
boost::unique_lock<boost::mutex> lock(mut);
|
||||
while (!task->IsDone())
|
||||
cond.wait(lock);
|
||||
*retvalP = xmlrpc_c::value_struct(task->GetRetData());
|
||||
}
|
||||
void
|
||||
Translator::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
boost::condition_variable cond;
|
||||
boost::mutex mut;
|
||||
boost::shared_ptr<TranslationRequest> task
|
||||
= TranslationRequest::create(paramList,cond,mut);
|
||||
m_threadPool.Submit(task);
|
||||
boost::unique_lock<boost::mutex> lock(mut);
|
||||
while (!task->IsDone())
|
||||
cond.wait(lock);
|
||||
*retvalP = xmlrpc_c::value_struct(task->GetRetData());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,17 +10,17 @@
|
||||
#endif
|
||||
namespace MosesServer
|
||||
{
|
||||
class
|
||||
class
|
||||
// MosesServer::
|
||||
Translator : public xmlrpc_c::method
|
||||
{
|
||||
public:
|
||||
Translator(size_t numThreads = 10);
|
||||
{
|
||||
public:
|
||||
Translator(size_t numThreads = 10);
|
||||
|
||||
void execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
private:
|
||||
Moses::ThreadPool m_threadPool;
|
||||
};
|
||||
void execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
private:
|
||||
Moses::ThreadPool m_threadPool;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,56 +2,56 @@
|
||||
|
||||
namespace MosesServer
|
||||
{
|
||||
using namespace Moses;
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
using namespace std;
|
||||
|
||||
Updater::
|
||||
Updater()
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Updates stuff";
|
||||
}
|
||||
Updater::
|
||||
Updater()
|
||||
{
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Updates stuff";
|
||||
}
|
||||
|
||||
void
|
||||
Updater::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
void
|
||||
Updater::
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP)
|
||||
{
|
||||
#if PT_UG
|
||||
const params_t params = paramList.getStruct(0);
|
||||
breakOutParams(params);
|
||||
Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
|
||||
pdsa->add(m_src, m_trg, m_aln);
|
||||
XVERBOSE(1,"Done inserting\n");
|
||||
*retvalP = xmlrpc_c::value_string("Phrase table updated");
|
||||
const params_t params = paramList.getStruct(0);
|
||||
breakOutParams(params);
|
||||
Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
|
||||
pdsa->add(m_src, m_trg, m_aln);
|
||||
XVERBOSE(1,"Done inserting\n");
|
||||
*retvalP = xmlrpc_c::value_string("Phrase table updated");
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
void
|
||||
Updater::
|
||||
breakOutParams(const params_t& params)
|
||||
{
|
||||
params_t::const_iterator si = params.find("source");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
m_src = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"source = " << m_src << endl);
|
||||
si = params.find("target");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing target sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
m_trg = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"target = " << m_trg << endl);
|
||||
if((si = params.find("alignment")) == params.end())
|
||||
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
|
||||
m_aln = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"alignment = " << m_aln << endl);
|
||||
m_bounded = ((si = params.find("bounded")) != params.end());
|
||||
m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
|
||||
};
|
||||
void
|
||||
Updater::
|
||||
breakOutParams(const params_t& params)
|
||||
{
|
||||
params_t::const_iterator si = params.find("source");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
m_src = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"source = " << m_src << endl);
|
||||
si = params.find("target");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing target sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
m_trg = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"target = " << m_trg << endl);
|
||||
if((si = params.find("alignment")) == params.end())
|
||||
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
|
||||
m_aln = xmlrpc_c::value_string(si->second);
|
||||
XVERBOSE(1,"alignment = " << m_aln << endl);
|
||||
m_bounded = ((si = params.find("bounded")) != params.end());
|
||||
m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -19,26 +19,26 @@
|
||||
|
||||
namespace MosesServer
|
||||
{
|
||||
class
|
||||
class
|
||||
Updater: public xmlrpc_c::method
|
||||
{
|
||||
{
|
||||
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
|
||||
std::string m_src, m_trg, m_aln;
|
||||
bool m_bounded, m_add2ORLM;
|
||||
std::string m_src, m_trg, m_aln;
|
||||
bool m_bounded, m_add2ORLM;
|
||||
|
||||
public:
|
||||
Updater();
|
||||
public:
|
||||
Updater();
|
||||
|
||||
void
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
void
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP);
|
||||
|
||||
void
|
||||
breakOutParams(const params_t& params);
|
||||
void
|
||||
breakOutParams(const params_t& params);
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -18,108 +18,104 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// todo: replace this with thread lock-free containers, if a stable library can
|
||||
// be found somewhere
|
||||
// todo: replace this with thread lock-free containers, if a stable library can
|
||||
// be found somewhere
|
||||
|
||||
template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
|
||||
class
|
||||
template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
|
||||
class
|
||||
ThreadSafeContainer
|
||||
{
|
||||
protected:
|
||||
mutable boost::shared_mutex m_lock;
|
||||
CONTAINER m_container;
|
||||
typedef typename CONTAINER::iterator iter_t;
|
||||
typedef typename CONTAINER::const_iterator const_iter_t;
|
||||
typedef typename CONTAINER::value_type entry_t;
|
||||
public:
|
||||
|
||||
class locking_iterator
|
||||
{
|
||||
protected:
|
||||
mutable boost::shared_mutex m_lock;
|
||||
CONTAINER m_container;
|
||||
typedef typename CONTAINER::iterator iter_t;
|
||||
typedef typename CONTAINER::const_iterator const_iter_t;
|
||||
typedef typename CONTAINER::value_type entry_t;
|
||||
boost::unique_lock<boost::shared_mutex> m_lock;
|
||||
CONTAINER const* m_container;
|
||||
const_iter_t m_iter;
|
||||
|
||||
locking_iterator(locking_iterator const& other); // no copies!
|
||||
public:
|
||||
locking_iterator() : m_container(NULL) { }
|
||||
|
||||
class locking_iterator
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> m_lock;
|
||||
CONTAINER const* m_container;
|
||||
const_iter_t m_iter;
|
||||
locking_iterator(boost::shared_mutex& lock,
|
||||
CONTAINER const* container,
|
||||
const_iter_t const& iter)
|
||||
: m_lock(lock), m_container(container), m_iter(iter)
|
||||
{ }
|
||||
|
||||
locking_iterator(locking_iterator const& other); // no copies!
|
||||
public:
|
||||
locking_iterator() : m_container(NULL) { }
|
||||
|
||||
locking_iterator(boost::shared_mutex& lock,
|
||||
CONTAINER const* container,
|
||||
const_iter_t const& iter)
|
||||
: m_lock(lock), m_container(container), m_iter(iter)
|
||||
{ }
|
||||
|
||||
entry_t const& operator->()
|
||||
{
|
||||
UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
|
||||
<< "or has not been assigned.");
|
||||
return m_iter.operator->();
|
||||
}
|
||||
|
||||
// locking operators transfer the lock upon assignment and become invalid
|
||||
locking_iterator const&
|
||||
operator=(locking_iterator& other)
|
||||
{
|
||||
m_lock.swap(other.m_lock);
|
||||
m_iter = other.m_iter;
|
||||
other.m_iter = other.m_container.end();
|
||||
}
|
||||
|
||||
bool
|
||||
operator==(const_iter_t const& other)
|
||||
{
|
||||
return m_iter == other;
|
||||
}
|
||||
|
||||
locking_iterator const&
|
||||
operator++() { ++m_iter; return *this; }
|
||||
|
||||
// DO NOT DEFINE THE POST-INCREMENT OPERATOR!
|
||||
// locking_operators are non-copyable,
|
||||
// so we can't simply make a copy before incrementing and return
|
||||
// the copy after incrementing
|
||||
locking_iterator const&
|
||||
operator++(int);
|
||||
};
|
||||
|
||||
const_iter_t const& end() const
|
||||
{ return m_container.end(); }
|
||||
|
||||
locking_iterator begin() const
|
||||
{
|
||||
return locking_iterator(m_lock, this, m_container.begin());
|
||||
entry_t const& operator->() {
|
||||
UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
|
||||
<< "or has not been assigned.");
|
||||
return m_iter.operator->();
|
||||
}
|
||||
|
||||
VAL const& set(KEY const& key, VAL const& val)
|
||||
{
|
||||
boost::unique_lock< boost::shared_mutex > lock(m_lock);
|
||||
entry_t entry(key,val);
|
||||
iter_t foo = m_container.insert(entry).first;
|
||||
foo->second = val;
|
||||
return foo->second;
|
||||
// locking operators transfer the lock upon assignment and become invalid
|
||||
locking_iterator const&
|
||||
operator=(locking_iterator& other) {
|
||||
m_lock.swap(other.m_lock);
|
||||
m_iter = other.m_iter;
|
||||
other.m_iter = other.m_container.end();
|
||||
}
|
||||
|
||||
VAL const* get(KEY const& key, VAL const& default_val)
|
||||
{
|
||||
boost::shared_lock< boost::shared_mutex > lock(m_lock);
|
||||
entry_t entry(key, default_val);
|
||||
iter_t foo = m_container.insert(entry).first;
|
||||
return &(foo->second);
|
||||
bool
|
||||
operator==(const_iter_t const& other) {
|
||||
return m_iter == other;
|
||||
}
|
||||
|
||||
VAL const* get(KEY const& key) const
|
||||
{
|
||||
boost::shared_lock< boost::shared_mutex > lock(m_lock);
|
||||
const_iter_t m = m_container.find(key);
|
||||
if (m == m_container.end()) return NULL;
|
||||
return &m->second;
|
||||
locking_iterator const&
|
||||
operator++() {
|
||||
++m_iter;
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t erase(KEY const& key)
|
||||
{
|
||||
boost::unique_lock< boost::shared_mutex > lock(m_lock);
|
||||
return m_container.erase(key);
|
||||
}
|
||||
// DO NOT DEFINE THE POST-INCREMENT OPERATOR!
|
||||
// locking_operators are non-copyable,
|
||||
// so we can't simply make a copy before incrementing and return
|
||||
// the copy after incrementing
|
||||
locking_iterator const&
|
||||
operator++(int);
|
||||
};
|
||||
|
||||
const_iter_t const& end() const {
|
||||
return m_container.end();
|
||||
}
|
||||
|
||||
locking_iterator begin() const {
|
||||
return locking_iterator(m_lock, this, m_container.begin());
|
||||
}
|
||||
|
||||
VAL const& set(KEY const& key, VAL const& val) {
|
||||
boost::unique_lock< boost::shared_mutex > lock(m_lock);
|
||||
entry_t entry(key,val);
|
||||
iter_t foo = m_container.insert(entry).first;
|
||||
foo->second = val;
|
||||
return foo->second;
|
||||
}
|
||||
|
||||
VAL const* get(KEY const& key, VAL const& default_val) {
|
||||
boost::shared_lock< boost::shared_mutex > lock(m_lock);
|
||||
entry_t entry(key, default_val);
|
||||
iter_t foo = m_container.insert(entry).first;
|
||||
return &(foo->second);
|
||||
}
|
||||
|
||||
VAL const* get(KEY const& key) const {
|
||||
boost::shared_lock< boost::shared_mutex > lock(m_lock);
|
||||
const_iter_t m = m_container.find(key);
|
||||
if (m == m_container.end()) return NULL;
|
||||
return &m->second;
|
||||
}
|
||||
|
||||
size_t erase(KEY const& key) {
|
||||
boost::unique_lock< boost::shared_mutex > lock(m_lock);
|
||||
return m_container.erase(key);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -146,7 +146,7 @@ public:
|
||||
|
||||
void AddProperty(const std::string &key, const std::string &value, float count) {
|
||||
std::map<std::string,
|
||||
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
|
||||
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
|
||||
if ( iter == m_properties.end() ) {
|
||||
// key not found: insert property key and value
|
||||
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
|
||||
|
@ -116,18 +116,18 @@ void PropertiesConsolidator::ProcessPropertiesString(const std::string &properti
|
||||
|
||||
} else if ( !keyValue[0].compare("POS") ) {
|
||||
|
||||
/* DO NOTHING (property is not registered in the decoder at the moment)
|
||||
if ( m_partsOfSpeechFlag ) {
|
||||
/* DO NOTHING (property is not registered in the decoder at the moment)
|
||||
if ( m_partsOfSpeechFlag ) {
|
||||
|
||||
// POS property: replace strings with vocabulary indices
|
||||
out << " {{" << keyValue[0];
|
||||
ProcessPOSPropertyValue(keyValue[1], out);
|
||||
out << "}}";
|
||||
// POS property: replace strings with vocabulary indices
|
||||
out << " {{" << keyValue[0];
|
||||
ProcessPOSPropertyValue(keyValue[1], out);
|
||||
out << "}}";
|
||||
|
||||
} else { // don't process POS property
|
||||
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
|
||||
}
|
||||
*/
|
||||
} else { // don't process POS property
|
||||
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
|
||||
}
|
||||
*/
|
||||
|
||||
} else {
|
||||
|
||||
|
@ -699,12 +699,12 @@ void ExtractGHKM::WriteGlueGrammar(
|
||||
// const size_t partOfSpeechSentenceStart = 0;
|
||||
// const size_t partOfSpeechSentenceEnd = 1;
|
||||
|
||||
#ifndef BOS_
|
||||
#define BOS_ "<s>" //Beginning of sentence symbol
|
||||
#endif
|
||||
#ifndef EOS_
|
||||
#define EOS_ "</s>" //End of sentence symbol
|
||||
#endif
|
||||
#ifndef BOS_
|
||||
#define BOS_ "<s>" //Beginning of sentence symbol
|
||||
#endif
|
||||
#ifndef EOS_
|
||||
#define EOS_ "</s>" //End of sentence symbol
|
||||
#endif
|
||||
|
||||
std::string sentenceStartSource = BOS_;
|
||||
std::string sentenceEndSource = EOS_;
|
||||
|
@ -12,7 +12,7 @@ namespace FilterRuleTable
|
||||
{
|
||||
|
||||
TreeCfgFilter::TreeCfgFilter(
|
||||
const std::vector<boost::shared_ptr<StringTree> > &sentences)
|
||||
const std::vector<boost::shared_ptr<StringTree> > &sentences)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -25,8 +25,9 @@ namespace FilterRuleTable
|
||||
// Filters a rule table, discarding rules that cannot be applied to a given
|
||||
// test set. The rule table must have a TSG source-side and the test sentences
|
||||
// must be parse trees.
|
||||
class TreeCfgFilter : public CfgFilter {
|
||||
public:
|
||||
class TreeCfgFilter : public CfgFilter
|
||||
{
|
||||
public:
|
||||
// Initialize the filter for a given set of test sentences.
|
||||
TreeCfgFilter(const std::vector<boost::shared_ptr<StringTree> > &);
|
||||
|
||||
|
@ -15,7 +15,7 @@ namespace PostprocessEgretForests
|
||||
|
||||
class Forest
|
||||
{
|
||||
public:
|
||||
public:
|
||||
struct Vertex;
|
||||
|
||||
struct Hyperedge {
|
||||
@ -35,7 +35,7 @@ class Forest
|
||||
|
||||
std::vector<boost::shared_ptr<Vertex> > vertices;
|
||||
|
||||
private:
|
||||
private:
|
||||
// Copying is not allowed.
|
||||
Forest(const Forest &);
|
||||
Forest &operator=(const Forest &);
|
||||
|
@ -17,15 +17,18 @@ namespace PostprocessEgretForests
|
||||
{
|
||||
|
||||
ForestParser::ForestParser()
|
||||
: m_input(0) {
|
||||
: m_input(0)
|
||||
{
|
||||
}
|
||||
|
||||
ForestParser::ForestParser(std::istream &input)
|
||||
: m_input(&input) {
|
||||
: m_input(&input)
|
||||
{
|
||||
++(*this);
|
||||
}
|
||||
|
||||
ForestParser &ForestParser::operator++() {
|
||||
ForestParser &ForestParser::operator++()
|
||||
{
|
||||
if (!m_input) {
|
||||
return *this;
|
||||
}
|
||||
@ -106,7 +109,7 @@ void ForestParser::ParseHyperedgeLine(const std::string &line, Forest &forest)
|
||||
}
|
||||
|
||||
boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
|
||||
const StringPiece &s)
|
||||
const StringPiece &s)
|
||||
{
|
||||
VertexSP v = boost::make_shared<Forest::Vertex>();
|
||||
std::size_t pos = s.rfind('[');
|
||||
@ -132,12 +135,14 @@ boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
|
||||
return v;
|
||||
}
|
||||
|
||||
bool operator==(const ForestParser &lhs, const ForestParser &rhs) {
|
||||
bool operator==(const ForestParser &lhs, const ForestParser &rhs)
|
||||
{
|
||||
// TODO Is this right? Compare values of istreams if non-zero?
|
||||
return lhs.m_input == rhs.m_input;
|
||||
}
|
||||
|
||||
bool operator!=(const ForestParser &lhs, const ForestParser &rhs) {
|
||||
bool operator!=(const ForestParser &lhs, const ForestParser &rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
|
@ -20,8 +20,9 @@ namespace Syntax
|
||||
namespace PostprocessEgretForests
|
||||
{
|
||||
|
||||
class ForestParser {
|
||||
public:
|
||||
class ForestParser
|
||||
{
|
||||
public:
|
||||
struct Entry {
|
||||
std::size_t sentNum;
|
||||
std::string sentence;
|
||||
@ -31,15 +32,19 @@ class ForestParser {
|
||||
ForestParser();
|
||||
ForestParser(std::istream &);
|
||||
|
||||
Entry &operator*() { return m_entry; }
|
||||
Entry *operator->() { return &m_entry; }
|
||||
Entry &operator*() {
|
||||
return m_entry;
|
||||
}
|
||||
Entry *operator->() {
|
||||
return &m_entry;
|
||||
}
|
||||
|
||||
ForestParser &operator++();
|
||||
|
||||
friend bool operator==(const ForestParser &, const ForestParser &);
|
||||
friend bool operator!=(const ForestParser &, const ForestParser &);
|
||||
|
||||
private:
|
||||
private:
|
||||
typedef boost::shared_ptr<Forest::Vertex> VertexSP;
|
||||
typedef boost::shared_ptr<Forest::Hyperedge> HyperedgeSP;
|
||||
|
||||
@ -60,7 +65,7 @@ class ForestParser {
|
||||
};
|
||||
|
||||
typedef boost::unordered_set<VertexSP, VertexSetHash,
|
||||
VertexSetPred> VertexSet;
|
||||
VertexSetPred> VertexSet;
|
||||
|
||||
// Copying is not allowed
|
||||
ForestParser(const ForestParser &);
|
||||
|
@ -61,7 +61,8 @@ void ForestWriter::WriteVertex(const Forest::Vertex &v)
|
||||
}
|
||||
}
|
||||
|
||||
std::string ForestWriter::PossiblyEscape(const std::string &s) const {
|
||||
std::string ForestWriter::PossiblyEscape(const std::string &s) const
|
||||
{
|
||||
if (m_options.escape) {
|
||||
return Escape(s);
|
||||
} else {
|
||||
@ -70,7 +71,8 @@ std::string ForestWriter::PossiblyEscape(const std::string &s) const {
|
||||
}
|
||||
|
||||
// Escapes XML special characters.
|
||||
std::string ForestWriter::Escape(const std::string &s) const {
|
||||
std::string ForestWriter::Escape(const std::string &s) const
|
||||
{
|
||||
std::string t;
|
||||
std::size_t len = s.size();
|
||||
t.reserve(len);
|
||||
|
@ -15,13 +15,13 @@ namespace PostprocessEgretForests
|
||||
|
||||
class ForestWriter
|
||||
{
|
||||
public:
|
||||
public:
|
||||
ForestWriter(const Options &options, std::ostream &out)
|
||||
: m_options(options), m_out(out) {}
|
||||
|
||||
void Write(const std::string &, const Forest &, std::size_t);
|
||||
|
||||
private:
|
||||
private:
|
||||
std::string Escape(const std::string &) const;
|
||||
std::string PossiblyEscape(const std::string &) const;
|
||||
void WriteHyperedgeLine(const Forest::Hyperedge &);
|
||||
|
@ -47,8 +47,8 @@ int PostprocessEgretForests::Main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
void PostprocessEgretForests::ProcessForest(
|
||||
std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
|
||||
const Options &options)
|
||||
std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
|
||||
const Options &options)
|
||||
{
|
||||
std::size_t sentNum = 0;
|
||||
ForestWriter writer(options, out);
|
||||
@ -77,7 +77,7 @@ void PostprocessEgretForests::ProcessForest(
|
||||
}
|
||||
|
||||
void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
|
||||
std::ifstream &stream)
|
||||
std::ifstream &stream)
|
||||
{
|
||||
stream.open(filename.c_str());
|
||||
if (!stream) {
|
||||
@ -88,7 +88,7 @@ void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
|
||||
}
|
||||
|
||||
void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
|
||||
Options &options) const
|
||||
Options &options) const
|
||||
{
|
||||
namespace po = boost::program_options;
|
||||
namespace cls = boost::program_options::command_line_style;
|
||||
@ -119,7 +119,7 @@ void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
|
||||
// (these are used as positional options).
|
||||
po::options_description hidden("Hidden options");
|
||||
hidden.add_options()
|
||||
// None
|
||||
// None
|
||||
;
|
||||
|
||||
// Compose the full set of command-line options.
|
||||
|
@ -16,15 +16,18 @@ namespace PostprocessEgretForests
|
||||
{
|
||||
|
||||
SplitPointFileParser::SplitPointFileParser()
|
||||
: m_input(0) {
|
||||
: m_input(0)
|
||||
{
|
||||
}
|
||||
|
||||
SplitPointFileParser::SplitPointFileParser(std::istream &input)
|
||||
: m_input(&input) {
|
||||
: m_input(&input)
|
||||
{
|
||||
++(*this);
|
||||
}
|
||||
|
||||
SplitPointFileParser &SplitPointFileParser::operator++() {
|
||||
SplitPointFileParser &SplitPointFileParser::operator++()
|
||||
{
|
||||
if (!m_input) {
|
||||
return *this;
|
||||
}
|
||||
@ -66,13 +69,15 @@ void SplitPointFileParser::ParseLine(const std::string &line,
|
||||
}
|
||||
|
||||
bool operator==(const SplitPointFileParser &lhs,
|
||||
const SplitPointFileParser &rhs) {
|
||||
const SplitPointFileParser &rhs)
|
||||
{
|
||||
// TODO Is this right? Compare values of istreams if non-zero?
|
||||
return lhs.m_input == rhs.m_input;
|
||||
}
|
||||
|
||||
bool operator!=(const SplitPointFileParser &lhs,
|
||||
const SplitPointFileParser &rhs) {
|
||||
const SplitPointFileParser &rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,9 @@ namespace Syntax
|
||||
namespace PostprocessEgretForests
|
||||
{
|
||||
|
||||
class SplitPointFileParser {
|
||||
public:
|
||||
class SplitPointFileParser
|
||||
{
|
||||
public:
|
||||
struct Entry {
|
||||
std::vector<SplitPoint> splitPoints;
|
||||
};
|
||||
@ -22,8 +23,12 @@ class SplitPointFileParser {
|
||||
SplitPointFileParser();
|
||||
SplitPointFileParser(std::istream &);
|
||||
|
||||
const Entry &operator*() const { return m_entry; }
|
||||
const Entry *operator->() const { return &m_entry; }
|
||||
const Entry &operator*() const {
|
||||
return m_entry;
|
||||
}
|
||||
const Entry *operator->() const {
|
||||
return &m_entry;
|
||||
}
|
||||
|
||||
SplitPointFileParser &operator++();
|
||||
|
||||
@ -33,7 +38,7 @@ class SplitPointFileParser {
|
||||
friend bool operator!=(const SplitPointFileParser &,
|
||||
const SplitPointFileParser &);
|
||||
|
||||
private:
|
||||
private:
|
||||
void ParseLine(const std::string &, std::vector<SplitPoint> &);
|
||||
|
||||
Entry m_entry;
|
||||
|
@ -30,14 +30,14 @@ inline bool operator==(const Symbol &s, const Symbol &t)
|
||||
}
|
||||
|
||||
struct SymbolHasher {
|
||||
public:
|
||||
public:
|
||||
std::size_t operator()(const Symbol &s) const {
|
||||
return hash_value(s);
|
||||
}
|
||||
};
|
||||
|
||||
struct SymbolEqualityPred {
|
||||
public:
|
||||
public:
|
||||
bool operator()(const Symbol &s, const Symbol &t) const {
|
||||
return s.value == t.value && s.isNonTerminal == t.isNonTerminal;
|
||||
}
|
||||
|
@ -16,10 +16,10 @@ namespace PostprocessEgretForests
|
||||
|
||||
class TopologicalSorter
|
||||
{
|
||||
public:
|
||||
public:
|
||||
void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
|
||||
|
||||
private:
|
||||
private:
|
||||
typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
|
||||
|
||||
void BuildPredSets(const Forest &);
|
||||
|
@ -900,7 +900,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
|
||||
}
|
||||
|
||||
if (nonTermContext && !inverseFlag) {
|
||||
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
|
||||
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
|
||||
if (!propValue.empty() && propValue.size() < 50000) {
|
||||
size_t nNTs = NumNonTerminal(phraseSource);
|
||||
phraseTableFile << " {{NonTermContext " << nNTs << " " << propValue << "}}";
|
||||
@ -908,7 +908,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
|
||||
}
|
||||
|
||||
if (nonTermContextTarget && !inverseFlag) {
|
||||
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget");
|
||||
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget");
|
||||
if (!propValue.empty() && propValue.size() < 50000) {
|
||||
size_t nNTs = NumNonTerminal(phraseSource);
|
||||
phraseTableFile << " {{NonTermContextTarget " << nNTs << " " << propValue << "}}";
|
||||
|
@ -427,7 +427,7 @@ int main(int argc, char** argv)
|
||||
ostream *out = &std::cout;
|
||||
|
||||
if (input) {
|
||||
fstream *fin = new fstream(input,ios::in);
|
||||
fstream *fin = new fstream(input,ios::in);
|
||||
if (!fin->is_open()) {
|
||||
cerr << "cannot open " << input << "\n";
|
||||
exit(1);
|
||||
@ -436,7 +436,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
|
||||
if (output) {
|
||||
fstream *fout = new fstream(output,ios::out);
|
||||
fstream *fout = new fstream(output,ios::out);
|
||||
if (!fout->is_open()) {
|
||||
cerr << "cannot open " << output << "\n";
|
||||
exit(1);
|
||||
@ -506,12 +506,12 @@ int main(int argc, char** argv)
|
||||
for (int i=1; i<=MAX_N; i++) delete [] A[i];
|
||||
delete [] A;
|
||||
|
||||
if (inp != &std::cin) {
|
||||
delete inp;
|
||||
}
|
||||
if (out != &std::cout) {
|
||||
delete inp;
|
||||
}
|
||||
if (inp != &std::cin) {
|
||||
delete inp;
|
||||
}
|
||||
if (out != &std::cout) {
|
||||
delete inp;
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user