From cc8c6b7b10abd8118014635609f7658f6a7a1857 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 2 May 2015 11:45:24 +0100 Subject: [PATCH] beautify --- mert/BleuScorer.cpp | 46 +- mert/ForestRescoreTest.cpp | 6 +- mert/MiraFeatureVectorTest.cpp | 3 +- mert/mert.cpp | 2 +- mert/sentence-bleu-nbest.cpp | 6 +- mert/sentence-bleu.cpp | 6 +- moses-cmd/LatticeMBRGrid.cpp | 63 +- moses-cmd/MainVW.cpp | 37 +- moses/BaseManager.cpp | 4 +- moses/ChartCellCollection.h | 4 +- moses/ConfusionNet.cpp | 2 +- moses/ContextScope.h | 127 +- moses/DecodeStepTranslation.cpp | 59 +- moses/ExportInterface.cpp | 209 ++- moses/ExportInterface.h | 4 +- moses/FF/Factory.cpp | 44 +- moses/FF/FeatureFunction.cpp | 14 +- moses/FF/FeatureFunction.h | 8 +- moses/FF/InternalTree.cpp | 3 +- moses/FF/InternalTree.h | 9 +- .../LexicalReordering/LexicalReordering.cpp | 8 +- .../LexicalReorderingState.cpp | 8 +- .../LexicalReorderingState.h | 25 +- .../FF/LexicalReordering/SparseReordering.cpp | 8 +- moses/FF/Model1Feature.cpp | 55 +- moses/FF/Model1Feature.h | 6 +- moses/FF/PhraseOrientationFeature.cpp | 2 +- moses/FF/PhraseOrientationFeature.h | 18 +- moses/FF/RulePairUnlexicalizedSource.cpp | 15 +- moses/FF/RuleScope.cpp | 48 +- moses/FF/TreeStructureFeature.cpp | 2 +- moses/FF/VW/VW.h | 8 +- moses/Hypothesis.cpp | 1171 +++++++++-------- moses/Hypothesis.h | 2 +- moses/IOWrapper.h | 2 +- moses/LM/RDLM.cpp | 377 +++--- moses/LM/RDLM.h | 167 +-- moses/Manager.cpp | 8 +- moses/OutputCollector.h | 2 +- moses/Parameter.cpp | 38 +- moses/Parameter.h | 24 +- moses/ScoreComponentCollection.cpp | 28 +- moses/ScoreComponentCollection.h | 4 +- moses/Sentence.cpp | 188 ++- moses/Sentence.h | 155 +-- moses/StaticData.cpp | 34 +- moses/StaticData.h | 12 +- moses/Syntax/F2S/HyperTreeLoader.cpp | 2 +- moses/Syntax/F2S/HyperTreeLoader.h | 2 +- moses/Syntax/F2S/Manager-inl.h | 2 +- moses/Syntax/F2S/Manager.h | 2 +- moses/Syntax/InputWeightFF.cpp | 12 +- moses/Syntax/InputWeightFF.h | 2 +- moses/TargetPhrase.cpp | 31 +- moses/TargetPhrase.h | 8 +- moses/TrainingTask.h | 10 +- .../CompactPT/BlockHashIndex.h | 2 +- .../CompactPT/MmapAllocator.h | 4 +- moses/TranslationModel/PhraseDictionary.h | 6 +- moses/TranslationOption.cpp | 2 +- moses/TranslationOption.h | 2 +- moses/TranslationOptionCollection.cpp | 17 +- moses/TranslationOptionCollection.h | 5 +- ...ranslationOptionCollectionConfusionNet.cpp | 2 +- moses/TranslationOptionCollectionLattice.cpp | 2 +- moses/TranslationTask.cpp | 68 +- moses/TranslationTask.h | 23 +- moses/TreeInput.h | 2 +- moses/Util.h | 2 +- moses/server/Optimizer.cpp | 127 +- moses/server/Optimizer.h | 14 +- moses/server/TranslationRequest.cpp | 673 +++++----- moses/server/TranslationRequest.h | 146 +- moses/server/Translator.cpp | 54 +- moses/server/Translator.h | 18 +- moses/server/Updater.cpp | 92 +- moses/server/Updater.h | 26 +- moses/thread_safe_container.h | 172 ++- phrase-extract/ExtractionPhrasePair.h | 2 +- phrase-extract/PropertiesConsolidator.cpp | 20 +- phrase-extract/extract-ghkm/ExtractGHKM.cpp | 12 +- .../filter-rule-table/TreeCfgFilter.cpp | 2 +- .../filter-rule-table/TreeCfgFilter.h | 5 +- .../postprocess-egret-forests/Forest.h | 4 +- .../ForestParser.cpp | 17 +- .../postprocess-egret-forests/ForestParser.h | 17 +- .../ForestWriter.cpp | 6 +- .../postprocess-egret-forests/ForestWriter.h | 4 +- .../PostprocessEgretForests.cpp | 10 +- .../SplitPointFileParser.cpp | 15 +- .../SplitPointFileParser.h | 15 +- .../postprocess-egret-forests/Symbol.h | 4 +- .../TopologicalSorter.h | 4 +- phrase-extract/score-main.cpp | 4 +- symal/symal.cpp | 16 +- 95 files changed, 2349 insertions(+), 2409 deletions(-) diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp index dc926054f..8ab749f3b 100644 --- a/mert/BleuScorer.cpp +++ b/mert/BleuScorer.cpp @@ -94,8 +94,7 @@ void BleuScorer::setReferenceFiles(const vector& referenceFiles) mert::VocabularyFactory::GetVocabulary()->clear(); //load reference data - for (size_t i = 0; i < referenceFiles.size(); ++i) - { + for (size_t i = 0; i < referenceFiles.size(); ++i) { TRACE_ERR("Loading reference from " << referenceFiles[i] << endl); ifstream ifs(referenceFiles[i].c_str()); @@ -133,28 +132,27 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id) void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const { - NgramCounts counts; - size_t length = CountNgrams(line, counts, kBleuNgramOrder); + NgramCounts counts; + size_t length = CountNgrams(line, counts, kBleuNgramOrder); - //for any counts larger than those already there, merge them in - for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) { - const NgramCounts::Key& ngram = ci->first; - const NgramCounts::Value newcount = ci->second; + //for any counts larger than those already there, merge them in + for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) { + const NgramCounts::Key& ngram = ci->first; + const NgramCounts::Value newcount = ci->second; - NgramCounts::Value oldcount = 0; - ref->get_counts()->Lookup(ngram, &oldcount); - if (newcount > oldcount) { - ref->get_counts()->operator[](ngram) = newcount; - } + NgramCounts::Value oldcount = 0; + ref->get_counts()->Lookup(ngram, &oldcount); + if (newcount > oldcount) { + ref->get_counts()->operator[](ngram) = newcount; } - //add in the length - ref->push_back(length); + } + //add in the length + ref->push_back(length); } bool BleuScorer::GetNextReferenceFromStreams(std::vector >& referenceStreams, Reference& ref) const { - for (vector >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) - { + for (vector >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) { if (!(*ifs)) return false; string line; if (!getline(**ifs, line)) return false; @@ -309,22 +307,20 @@ vector BleuScorer::ScoreNbestList(const string& scoreFile, const string& vector featureDataIters; vector scoreDataIters; - for (size_t i = 0; i < featureFiles.size(); ++i) - { + for (size_t i = 0; i < featureFiles.size(); ++i) { featureDataIters.push_back(FeatureDataIterator(featureFiles[i])); scoreDataIters.push_back(ScoreDataIterator(scoreFiles[i])); } vector > hypotheses; - UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(), + UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(), "At the end of feature data iterator"); - for (size_t i = 0; i < featureFiles.size(); ++i) - { - UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(), + for (size_t i = 0; i < featureFiles.size(); ++i) { + UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(), "Feature file " << i << " ended prematurely"); - UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(), + UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(), "Score file " << i << " ended prematurely"); - UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(), + UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(), "Features and scores have different size"); for (size_t j = 0; j < featureDataIters[i]->size(); ++j) { hypotheses.push_back(pair(i,j)); diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp index f1a1c8423..91c4fe4f3 100644 --- a/mert/ForestRescoreTest.cpp +++ b/mert/ForestRescoreTest.cpp @@ -13,7 +13,8 @@ using namespace std; using namespace MosesTuning; -BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) { +BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) +{ Vocab vocab; WordVec words; string wordStrings[] = @@ -244,7 +245,8 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice) BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]); } -BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) { +BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) +{ Vocab vocab; //References ReferenceSet references; diff --git a/mert/MiraFeatureVectorTest.cpp b/mert/MiraFeatureVectorTest.cpp index d64ba79a5..999b8512a 100644 --- a/mert/MiraFeatureVectorTest.cpp +++ b/mert/MiraFeatureVectorTest.cpp @@ -11,7 +11,8 @@ how many of the features are really "dense". This is because in hg mira all features (sparse and dense) are to get rolled in to SparseVector */ -BOOST_AUTO_TEST_CASE(from_sparse) { +BOOST_AUTO_TEST_CASE(from_sparse) +{ SparseVector sp; sp.set("dense0", 0.2); sp.set("dense1", 0.3); diff --git a/mert/mert.cpp b/mert/mert.cpp index 82b4cc34d..aa6e2a08e 100644 --- a/mert/mert.cpp +++ b/mert/mert.cpp @@ -474,7 +474,7 @@ int main(int argc, char **argv) // A task for each start point for (size_t j = 0; j < startingPoints.size(); ++j) { boost::shared_ptr - task(new OptimizationTask(optimizer, startingPoints[j])); + task(new OptimizationTask(optimizer, startingPoints[j])); tasks.push_back(task); #ifdef WITH_THREADS pool.Submit(task); diff --git a/mert/sentence-bleu-nbest.cpp b/mert/sentence-bleu-nbest.cpp index f869386e3..599230511 100644 --- a/mert/sentence-bleu-nbest.cpp +++ b/mert/sentence-bleu-nbest.cpp @@ -32,8 +32,7 @@ int main(int argc, char **argv) // initialize reference streams std::vector > refStreams; - for (std::vector::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) - { + for (std::vector::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) { TRACE_ERR("Loading reference from " << *refFile << std::endl); boost::shared_ptr ifs(new std::ifstream(refFile->c_str())); UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile); @@ -44,8 +43,7 @@ int main(int argc, char **argv) std::string nbestLine; int sid = -1; Reference ref; - while ( getline(std::cin, nbestLine) ) - { + while ( getline(std::cin, nbestLine) ) { std::vector items; Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| "); int sidCurrent = Moses::Scan(items[0]); diff --git a/mert/sentence-bleu.cpp b/mert/sentence-bleu.cpp index 9bdab30d2..3f886ffeb 100644 --- a/mert/sentence-bleu.cpp +++ b/mert/sentence-bleu.cpp @@ -34,8 +34,7 @@ int main(int argc, char **argv) // initialize reference streams vector > refStreams; - for (vector::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) - { + for (vector::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) { TRACE_ERR("Loading reference from " << *refFile << endl); boost::shared_ptr ifs(new ifstream(refFile->c_str())); UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile); @@ -45,8 +44,7 @@ int main(int argc, char **argv) // load sentences, preparing statistics, score string hypothesisLine; size_t sid = 0; - while (getline(std::cin, hypothesisLine)) - { + while (getline(std::cin, hypothesisLine)) { Reference ref; if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) { UTIL_THROW2("Missing references"); diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp index f842b1136..0447a16fa 100644 --- a/moses-cmd/LatticeMBRGrid.cpp +++ b/moses-cmd/LatticeMBRGrid.cpp @@ -177,39 +177,34 @@ int main(int argc, char* argv[]) const vector& scale_grid = grid.getGrid(lmbr_scale); boost::shared_ptr source; - while((source = ioWrapper->ReadInput()) != NULL) - { - // set up task of translating one sentence - boost::shared_ptr ttask; - ttask = TranslationTask::create(source, ioWrapper); - Manager manager(ttask); - manager.Decode(); - TrellisPathList nBestList; - manager.CalcNBest(nBestSize, nBestList,true); - //grid search - BOOST_FOREACH(float const& p, pgrid) - { - SD.SetLatticeMBRPrecision(p); - BOOST_FOREACH(float const& r, rgrid) - { - SD.SetLatticeMBRPRatio(r); - BOOST_FOREACH(size_t const prune_i, prune_grid) - { - SD.SetLatticeMBRPruningFactor(size_t(prune_i)); - BOOST_FOREACH(float const& scale_i, scale_grid) - { - SD.SetMBRScale(scale_i); - size_t lineCount = source->GetTranslationId(); - cout << lineCount << " ||| " << p << " " - << r << " " << size_t(prune_i) << " " << scale_i - << " ||| "; - vector mbrBestHypo = doLatticeMBR(manager,nBestList); - manager.OutputBestHypo(mbrBestHypo, lineCount, - SD.GetReportSegmentation(), - SD.GetReportAllFactors(),cout); - } - } - } - } + while((source = ioWrapper->ReadInput()) != NULL) { + // set up task of translating one sentence + boost::shared_ptr ttask; + ttask = TranslationTask::create(source, ioWrapper); + Manager manager(ttask); + manager.Decode(); + TrellisPathList nBestList; + manager.CalcNBest(nBestSize, nBestList,true); + //grid search + BOOST_FOREACH(float const& p, pgrid) { + SD.SetLatticeMBRPrecision(p); + BOOST_FOREACH(float const& r, rgrid) { + SD.SetLatticeMBRPRatio(r); + BOOST_FOREACH(size_t const prune_i, prune_grid) { + SD.SetLatticeMBRPruningFactor(size_t(prune_i)); + BOOST_FOREACH(float const& scale_i, scale_grid) { + SD.SetMBRScale(scale_i); + size_t lineCount = source->GetTranslationId(); + cout << lineCount << " ||| " << p << " " + << r << " " << size_t(prune_i) << " " << scale_i + << " ||| "; + vector mbrBestHypo = doLatticeMBR(manager,nBestList); + manager.OutputBestHypo(mbrBestHypo, lineCount, + SD.GetReportSegmentation(), + SD.GetReportAllFactors(),cout); + } + } + } } + } } diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp index 302866733..ac54c1ed6 100644 --- a/moses-cmd/MainVW.cpp +++ b/moses-cmd/MainVW.cpp @@ -144,27 +144,28 @@ int main(int argc, char** argv) #endif // main loop over set of input sentences - + boost::shared_ptr source; - while ((source = ioWrapper->ReadInput()) != NULL) - { - IFVERBOSE(1) { ResetUserTime(); } - - InputType* foo = source.get(); - FeatureFunction::CallChangeSource(foo); - - // set up task of training one sentence - boost::shared_ptr task; - task = TrainingTask::create(source, ioWrapper); - - // execute task -#ifdef WITH_THREADS - pool.Submit(task); -#else - task->Run(); -#endif + while ((source = ioWrapper->ReadInput()) != NULL) { + IFVERBOSE(1) { + ResetUserTime(); } + InputType* foo = source.get(); + FeatureFunction::CallChangeSource(foo); + + // set up task of training one sentence + boost::shared_ptr task; + task = TrainingTask::create(source, ioWrapper); + + // execute task +#ifdef WITH_THREADS + pool.Submit(task); +#else + task->Run(); +#endif + } + // we are done, finishing up #ifdef WITH_THREADS pool.Stop(true); //flush remaining jobs diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp index a89bb848a..83d48e6e4 100644 --- a/moses/BaseManager.cpp +++ b/moses/BaseManager.cpp @@ -17,7 +17,9 @@ BaseManager::BaseManager(ttasksptr const& ttask) const InputType& BaseManager::GetSource() const -{ return m_source; } +{ + return m_source; +} diff --git a/moses/ChartCellCollection.h b/moses/ChartCellCollection.h index 5945ce12a..ac8e0fd38 100644 --- a/moses/ChartCellCollection.h +++ b/moses/ChartCellCollection.h @@ -36,8 +36,8 @@ class ChartCellCollectionBase { public: template ChartCellCollectionBase(const InputType &input, - const Factory &factory, - const ChartParser &parser) + const Factory &factory, + const ChartParser &parser) :m_cells(input.GetSize()) { size_t size = input.GetSize(); diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index e305a4147..0c355fd94 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -299,7 +299,7 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const = StaticData::Instance().GetTranslationOptionThreshold(); TranslationOptionCollection *rv = new TranslationOptionCollectionConfusionNet - (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold); + (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold); assert(rv); return rv; } diff --git a/moses/ContextScope.h b/moses/ContextScope.h index ed9f854ff..e9edf7b15 100644 --- a/moses/ContextScope.h +++ b/moses/ContextScope.h @@ -18,80 +18,75 @@ namespace Moses { - class ContextScope - { - protected: - typedef std::map > scratchpad_t; - typedef scratchpad_t::iterator iter_t; - typedef scratchpad_t::value_type entry_t; - typedef scratchpad_t::const_iterator const_iter_t; - scratchpad_t m_scratchpad; - mutable boost::shared_mutex m_lock; - public: - // class write_access - // { - // boost::unique_lock m_lock; - // public: +class ContextScope +{ +protected: + typedef std::map > scratchpad_t; + typedef scratchpad_t::iterator iter_t; + typedef scratchpad_t::value_type entry_t; + typedef scratchpad_t::const_iterator const_iter_t; + scratchpad_t m_scratchpad; + mutable boost::shared_mutex m_lock; +public: + // class write_access + // { + // boost::unique_lock m_lock; + // public: - // write_access(boost::shared_mutex& lock) - // : m_lock(lock) - // { } + // write_access(boost::shared_mutex& lock) + // : m_lock(lock) + // { } - // write_access(write_access& other) - // { - // swap(m_lock, other.m_lock); - // } - // }; + // write_access(write_access& other) + // { + // swap(m_lock, other.m_lock); + // } + // }; - // write_access lock() const - // { - // return write_access(m_lock); - // } + // write_access lock() const + // { + // return write_access(m_lock); + // } - template - boost::shared_ptr const& - set(void const* const key, boost::shared_ptr const& val) - { - boost::unique_lock lock(m_lock); - return (m_scratchpad[key] = val); - } + template + boost::shared_ptr const& + set(void const* const key, boost::shared_ptr const& val) { + boost::unique_lock lock(m_lock); + return (m_scratchpad[key] = val); + } - template - boost::shared_ptr const - get(void const* key, bool CreateNewIfNecessary=false) - { - using boost::shared_mutex; - using boost::upgrade_lock; - // T const* key = reinterpret_cast(xkey); - upgrade_lock lock(m_lock); - iter_t m = m_scratchpad.find(key); - boost::shared_ptr< T > ret; - if (m != m_scratchpad.end()) - { - if (m->second == NULL && CreateNewIfNecessary) - { - boost::upgrade_to_unique_lock xlock(lock); - m->second.reset(new T); - } - ret = boost::static_pointer_cast< T >(m->second); - return ret; - } - if (!CreateNewIfNecessary) return ret; - boost::upgrade_to_unique_lock xlock(lock); - ret.reset(new T); - m_scratchpad[key] = ret; + template + boost::shared_ptr const + get(void const* key, bool CreateNewIfNecessary=false) { + using boost::shared_mutex; + using boost::upgrade_lock; + // T const* key = reinterpret_cast(xkey); + upgrade_lock lock(m_lock); + iter_t m = m_scratchpad.find(key); + boost::shared_ptr< T > ret; + if (m != m_scratchpad.end()) { + if (m->second == NULL && CreateNewIfNecessary) { + boost::upgrade_to_unique_lock xlock(lock); + m->second.reset(new T); + } + ret = boost::static_pointer_cast< T >(m->second); return ret; } + if (!CreateNewIfNecessary) return ret; + boost::upgrade_to_unique_lock xlock(lock); + ret.reset(new T); + m_scratchpad[key] = ret; + return ret; + } - ContextScope() { } + ContextScope() { } - ContextScope(ContextScope const& other) - { - boost::unique_lock lock1(this->m_lock); - boost::unique_lock lock2(other.m_lock); - m_scratchpad = other.m_scratchpad; - } - - }; + ContextScope(ContextScope const& other) { + boost::unique_lock lock1(this->m_lock); + boost::unique_lock lock2(other.m_lock); + m_scratchpad = other.m_scratchpad; + } + +}; }; diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp index 7ea26f8a5..034c06fc2 100644 --- a/moses/DecodeStepTranslation.cpp +++ b/moses/DecodeStepTranslation.cpp @@ -218,17 +218,16 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY( void DecodeStepTranslation:: ProcessLEGACY(TranslationOption const& in, - DecodeStep const& decodeStep, - PartialTranslOptColl &out, - TranslationOptionCollection *toc, - bool adhereTableLimit) const + DecodeStep const& decodeStep, + PartialTranslOptColl &out, + TranslationOptionCollection *toc, + bool adhereTableLimit) const { - if (in.GetTargetPhrase().GetSize() == 0) - { - // word deletion - out.Add(new TranslationOption(in)); - return; - } + if (in.GetTargetPhrase().GetSize() == 0) { + // word deletion + out.Add(new TranslationOption(in)); + return; + } // normal trans step WordsRange const& srcRange = in.GetSourceWordsRange(); @@ -241,34 +240,32 @@ ProcessLEGACY(TranslationOption const& in, TargetPhraseCollectionWithSourcePhrase const* phraseColl; phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange); - if (phraseColl != NULL) - { - TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; - iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit) - ? phraseColl->begin() + tableLimit : phraseColl->end()); + if (phraseColl != NULL) { + TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; + iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit) + ? phraseColl->begin() + tableLimit : phraseColl->end()); - for (iterTargetPhrase = phraseColl->begin(); - iterTargetPhrase != iterEnd; - ++iterTargetPhrase) - { - TargetPhrase const& targetPhrase = **iterTargetPhrase; - if (targetPhrase.GetSize() != currSize || - (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors))) - continue; + for (iterTargetPhrase = phraseColl->begin(); + iterTargetPhrase != iterEnd; + ++iterTargetPhrase) { + TargetPhrase const& targetPhrase = **iterTargetPhrase; + if (targetPhrase.GetSize() != currSize || + (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors))) + continue; - TargetPhrase outPhrase(inPhrase); - outPhrase.Merge(targetPhrase, m_newOutputFactors); - outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up + TargetPhrase outPhrase(inPhrase); + outPhrase.Merge(targetPhrase, m_newOutputFactors); + outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up - TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase); - assert(newTransOpt != NULL); + TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase); + assert(newTransOpt != NULL); - newTransOpt->SetInputPath(inputPath); + newTransOpt->SetInputPath(inputPath); - out.Add(newTransOpt); + out.Add(newTransOpt); - } } + } } } diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp index 27f757b5c..342e6dc7a 100644 --- a/moses/ExportInterface.cpp +++ b/moses/ExportInterface.cpp @@ -83,16 +83,16 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream) SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni): m_staticData(StaticData::Instance()) { - if (!m_params.LoadParam(mosesIni)) { - cerr << "Error; Cannot load parameters at " << mosesIni< source = ioWrapper->ReadInput(); if (!source) return "Error: Source==null!!!"; - IFVERBOSE(1) { ResetUserTime(); } + IFVERBOSE(1) { + ResetUserTime(); + } FeatureFunction::CallChangeSource(&*source); // set up task of translating one sentence boost::shared_ptr task - = TranslationTask::create(source, ioWrapper); + = TranslationTask::create(source, ioWrapper); task->Run(); string output = outputStream.str(); @@ -147,10 +149,14 @@ int run_as_server() { #ifdef HAVE_XMLRPC_C - int port; params.SetParameter(port, "server-port", 8080); - bool isSerial; params.SetParameter(isSerial, "serial", false); - string logfile; params.SetParameter(logfile, "server-log", string("")); - size_t num_threads; params.SetParameter(num_threads, "threads", size_t(10)); + int port; + params.SetParameter(port, "server-port", 8080); + bool isSerial; + params.SetParameter(isSerial, "serial", false); + string logfile; + params.SetParameter(logfile, "server-log", string("")); + size_t num_threads; + params.SetParameter(num_threads, "threads", size_t(10)); if (isSerial) VERBOSE(1,"Running server in serial mode." << endl); xmlrpc_c::registry myRegistry; @@ -166,8 +172,9 @@ run_as_server() xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile); XVERBOSE(1,"Listening on port " << port << endl); - if (isSerial) { while(1) myAbyssServer.runOnce(); } - else myAbyssServer.run(); + if (isSerial) { + while(1) myAbyssServer.runOnce(); + } else myAbyssServer.run(); std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl; // #pragma message("BUILDING MOSES WITH SERVER SUPPORT") @@ -193,16 +200,15 @@ batch_run() // set up read/writing class: boost::shared_ptr ioWrapper(new IOWrapper); UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object" - << " [" << HERE << "]"); + << " [" << HERE << "]"); // check on weights const ScoreComponentCollection& weights = staticData.GetAllWeights(); - IFVERBOSE(2) - { - TRACE_ERR("The global weight vector looks like this: "); - TRACE_ERR(weights); - TRACE_ERR("\n"); - } + IFVERBOSE(2) { + TRACE_ERR("The global weight vector looks like this: "); + TRACE_ERR(weights); + TRACE_ERR("\n"); + } #ifdef WITH_THREADS ThreadPool pool(staticData.ThreadCount()); @@ -214,57 +220,53 @@ batch_run() // main loop over set of input sentences boost::shared_ptr source; - while ((source = ioWrapper->ReadInput()) != NULL) - { - IFVERBOSE(1) ResetUserTime(); + while ((source = ioWrapper->ReadInput()) != NULL) { + IFVERBOSE(1) ResetUserTime(); - FeatureFunction::CallChangeSource(source.get()); + FeatureFunction::CallChangeSource(source.get()); - // set up task of translating one sentence - boost::shared_ptr - task = TranslationTask::create(source, ioWrapper); - task->SetContextString(context_string); + // set up task of translating one sentence + boost::shared_ptr + task = TranslationTask::create(source, ioWrapper); + task->SetContextString(context_string); - // Allow for (sentence-)context-specific processing prior to - // decoding. This can be used, for example, for context-sensitive - // phrase lookup. - FeatureFunction::SetupAll(*task); + // Allow for (sentence-)context-specific processing prior to + // decoding. This can be used, for example, for context-sensitive + // phrase lookup. + FeatureFunction::SetupAll(*task); - // execute task + // execute task #ifdef WITH_THREADS #ifdef PT_UG - // simulated post-editing requires threads (within the dynamic phrase tables) - // but runs all sentences serially, to allow updating of the bitext. - bool spe = params.isParamSpecified("spe-src"); - if (spe) - { - // simulated post-editing: always run single-threaded! - task->Run(); - string src,trg,aln; - UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] " - << "missing update data for simulated post-editing."); - UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] " - << "missing update data for simulated post-editing."); - UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] " - << "missing update data for simulated post-editing."); - BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) - { - Mmsapt* sapt = dynamic_cast(pd); - if (sapt) sapt->add(src,trg,aln); - VERBOSE(1,"[" << HERE << " added src] " << src << endl); - VERBOSE(1,"[" << HERE << " added trg] " << trg << endl); - VERBOSE(1,"[" << HERE << " added aln] " << aln << endl); - } - } - else pool.Submit(task); + // simulated post-editing requires threads (within the dynamic phrase tables) + // but runs all sentences serially, to allow updating of the bitext. + bool spe = params.isParamSpecified("spe-src"); + if (spe) { + // simulated post-editing: always run single-threaded! + task->Run(); + string src,trg,aln; + UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) { + Mmsapt* sapt = dynamic_cast(pd); + if (sapt) sapt->add(src,trg,aln); + VERBOSE(1,"[" << HERE << " added src] " << src << endl); + VERBOSE(1,"[" << HERE << " added trg] " << trg << endl); + VERBOSE(1,"[" << HERE << " added aln] " << aln << endl); + } + } else pool.Submit(task); #else - pool.Submit(task); + pool.Submit(task); #endif #else - task->Run(); + task->Run(); #endif - } + } // we are done, finishing up #ifdef WITH_THREADS @@ -289,52 +291,49 @@ int decoder_main(int argc, char** argv) #ifdef NDEBUG try #endif - { + { #ifdef HAVE_PROTOBUF - GOOGLE_PROTOBUF_VERIFY_VERSION; + GOOGLE_PROTOBUF_VERIFY_VERSION; #endif - // echo command line, if verbose - IFVERBOSE(1) - { - TRACE_ERR("command: "); - for(int i=0; i just dump out weights and exit - if (params.isParamSpecified("show-weights")) - { - ShowWeights(); - exit(0); - } - - if (params.GetParam("server")) - return run_as_server(); - else - return batch_run(); - + // echo command line, if verbose + IFVERBOSE(1) { + TRACE_ERR("command: "); + for(int i=0; i just dump out weights and exit + if (params.isParamSpecified("show-weights")) { + ShowWeights(); + exit(0); + } + + if (params.GetParam("server")) + return run_as_server(); + else + return batch_run(); + + } #ifdef NDEBUG - catch (const std::exception &e) - { - std::cerr << "Exception: " << e.what() << std::endl; - return EXIT_FAILURE; - } + catch (const std::exception &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } #endif } diff --git a/moses/ExportInterface.h b/moses/ExportInterface.h index 56e37c7e1..03a8b1f1c 100644 --- a/moses/ExportInterface.h +++ b/moses/ExportInterface.h @@ -45,7 +45,9 @@ public: ~SimpleTranslationInterface(); std::string translate(const std::string &input); Moses::StaticData& getStaticData(); - Moses::Parameter& getParameters(){ return m_params; } + Moses::Parameter& getParameters() { + return m_params; + } private: SimpleTranslationInterface(); Moses::Parameter m_params; diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 81c6bdeb9..c797381ff 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -157,32 +157,26 @@ FeatureFactory std::vector weights = static_data.GetParameter()->GetWeights(featureName); - if (feature->GetNumScoreComponents()) - { - if (weights.size() == 0) - { - weights = feature->DefaultWeights(); - if (weights.size() == 0) - { - TRACE_ERR("WARNING: No weights specified in config file for FF " - << featureName << ". This FF does not supply default values.\n" - << "WARNING: Auto-initializing all weights for this FF to 1.0"); - weights.assign(feature->GetNumScoreComponents(),1.0); - } - else - { - TRACE_ERR("WARNING: No weights specified in config file for FF " - << featureName << ". Using default values supplied by FF."); - } - } - UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(), - "FATAL ERROR: Mismatch in number of features and number " - << "of weights for Feature Function " << featureName - << " (features: " << feature->GetNumScoreComponents() - << " vs. weights: " << weights.size() << ")"); - static_data.SetWeights(feature, weights); + if (feature->GetNumScoreComponents()) { + if (weights.size() == 0) { + weights = feature->DefaultWeights(); + if (weights.size() == 0) { + TRACE_ERR("WARNING: No weights specified in config file for FF " + << featureName << ". This FF does not supply default values.\n" + << "WARNING: Auto-initializing all weights for this FF to 1.0"); + weights.assign(feature->GetNumScoreComponents(),1.0); + } else { + TRACE_ERR("WARNING: No weights specified in config file for FF " + << featureName << ". Using default values supplied by FF."); + } } - else if (feature->IsTuneable()) + UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(), + "FATAL ERROR: Mismatch in number of features and number " + << "of weights for Feature Function " << featureName + << " (features: " << feature->GetNumScoreComponents() + << " vs. weights: " << weights.size() << ")"); + static_data.SetWeights(feature, weights); + } else if (feature->IsTuneable()) static_data.SetWeights(feature, weights); } diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index 298a9e65c..baa2b5563 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -55,7 +55,7 @@ void FeatureFunction::CallChangeSource(InputType * const&input) void FeatureFunction::SetupAll(TranslationTask const& ttask) { BOOST_FOREACH(FeatureFunction* ff, s_staticColl) - ff->Setup(ttask); + ff->Setup(ttask); } FeatureFunction:: @@ -193,17 +193,23 @@ void FeatureFunction::SetTuneableComponents(const std::string& value) void FeatureFunction ::InitializeForInput(ttasksptr const& ttask) -{ InitializeForInput(*(ttask->GetSource().get())); } +{ + InitializeForInput(*(ttask->GetSource().get())); +} void FeatureFunction ::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) -{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); } +{ + CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); +} size_t FeatureFunction ::GetIndex() const -{ return m_index; } +{ + return m_index; +} /// set index diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index a8f189f0b..56f6cdff0 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -136,7 +136,9 @@ public: CleanUpAfterSentenceProcessing(ttasksptr const& ttask); const std::string & - GetArgLine() const { return m_argLine; } + GetArgLine() const { + return m_argLine; + } // given a target phrase containing only factors specified in mask // return true if the feature function can be evaluated @@ -153,8 +155,8 @@ public: // source from the input sentence virtual void EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase, - ScoreComponentCollection& scoreBreakdown, - ScoreComponentCollection& estimatedFutureScore) const = 0; + ScoreComponentCollection& scoreBreakdown, + ScoreComponentCollection& estimatedFutureScore) const = 0; // override this method if you want to change the input before decoding virtual void ChangeSource(InputType * const&input) const { } diff --git a/moses/FF/InternalTree.cpp b/moses/FF/InternalTree.cpp index 95730f018..4a01ea1b2 100644 --- a/moses/FF/InternalTree.cpp +++ b/moses/FF/InternalTree.cpp @@ -147,8 +147,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector &ret) const const std::string &label = (*itx)->GetLabel(); if (!label.empty() && label[0] == '^') { (*itx)->GetUnbinarizedChildren(ret); - } - else { + } else { ret.push_back(*itx); } } diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h index f9a8ba5d8..8f982c6aa 100644 --- a/moses/FF/InternalTree.h +++ b/moses/FF/InternalTree.h @@ -96,8 +96,7 @@ public: bool RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it, InternalTree const* &parent) const; // Python-like generator that yields next nonterminal leaf on every call - $generator(leafNT) - { + $generator(leafNT) { std::vector::iterator it; InternalTree* tree; leafNT(InternalTree* root = 0): tree(root) {} @@ -116,8 +115,7 @@ public: // Python-like generator that yields the parent of the next nonterminal leaf on every call - $generator(leafNTParent) - { + $generator(leafNTParent) { std::vector::iterator it; InternalTree* tree; leafNTParent(InternalTree* root = 0): tree(root) {} @@ -135,8 +133,7 @@ public: }; // Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal - $generator(leafNTPath) - { + $generator(leafNTPath) { std::vector::iterator it; InternalTree* tree; std::vector * path; diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index c67a16076..9a8fa0f08 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -66,9 +66,9 @@ LexicalReordering(const std::string &line) // sanity check: number of default scores size_t numScores - = m_numScoreComponents + = m_numScoreComponents = m_numTuneableComponents - = m_configuration->GetNumScoreComponents(); + = m_configuration->GetNumScoreComponents(); UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores, "wrong number of default scores (" << m_defaultScores.size() << ") for lexicalized reordering model (expected " @@ -89,7 +89,7 @@ Load() typedef LexicalReorderingTable LRTable; if (m_filePath.size()) m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF, - m_factorsE, std::vector())); + m_factorsE, std::vector())); } Scores @@ -158,7 +158,7 @@ LexicalReordering:: SetCache(TranslationOptionList& tol) const { BOOST_FOREACH(TranslationOption* to, tol) - this->SetCache(*to); + this->SetCache(*to); } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index 48fd577f1..90de3ad9c 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -101,7 +101,7 @@ GetOrientation(int const reoDistance) const // this one is for HierarchicalReorderingBackwardState return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L - : (reoDistance == 1) ? M + : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM : (reoDistance == -1) ? S : (m_modelType == MSD) ? D @@ -115,7 +115,7 @@ GetOrientation(WordsRange const& prev, WordsRange const& cur, { return ((m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L - : IsMonotonicStep(prev,cur,cov) ? M + : IsMonotonicStep(prev,cur,cov) ? M : (m_modelType == Monotonic) ? NM : IsSwap(prev,cur,cov) ? S : (m_modelType == MSD) ? D @@ -263,7 +263,7 @@ CopyScores(ScoreComponentCollection* accum, const SparseReordering* sparse = m_configuration.GetSparseReordering(); if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, - m_direction, accum); + m_direction, accum); } @@ -342,7 +342,7 @@ Expand(const TranslationOption& topt, const InputType& input, LRModel const& lrmodel = m_configuration; WordsRange const cur = topt.GetSourceWordsRange(); LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur) - : lrmodel.GetOrientation(m_prevRange,cur)); + : lrmodel.GetOrientation(m_prevRange,cur)); CopyScores(scores, topt, input, reoType); } return new PhraseBasedReorderingState(this, topt); diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 1e488fc41..19904ae32 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -44,19 +44,18 @@ public: static const ReorderingType L = 1; // left static const ReorderingType MAX = 3; // largest possible #else - enum ReorderingType - { - M = 0, // monotonic - NM = 1, // non-monotonic - S = 1, // swap - D = 2, // discontinuous - DL = 2, // discontinuous, left - DR = 3, // discontinuous, right - R = 0, // right - L = 1, // left - MAX = 3, // largest possible - NONE = 4 // largest possible - }; + enum ReorderingType { + M = 0, // monotonic + NM = 1, // non-monotonic + S = 1, // swap + D = 2, // discontinuous + DL = 2, // discontinuous, left + DR = 3, // discontinuous, right + R = 0, // right + L = 1, // left + MAX = 3, // largest possible + NONE = 4 // largest possible + }; #endif // determine orientation, depending on model: diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 5397dcb10..6c81ca414 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -114,10 +114,10 @@ void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, position <= SparseReorderingFeatureKey::Last; ++position) { for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) { SparseReorderingFeatureKey - key(index, static_cast(type), - factor, isCluster, - static_cast(position), - side, static_cast(reoType)); + key(index, static_cast(type), + factor, isCluster, + static_cast(position), + side, static_cast(reoType)); m_featureMap.insert(pair(key,m_producer->GetFeatureName(key.Name(id)))); } } diff --git a/moses/FF/Model1Feature.cpp b/moses/FF/Model1Feature.cpp index 6f6552461..09cfd47ab 100644 --- a/moses/FF/Model1Feature.cpp +++ b/moses/FF/Model1Feature.cpp @@ -71,21 +71,18 @@ void Model1Vocabulary::Load(const std::string& fileName) std::string line; unsigned i = 0; - if ( getline(inFile, line) ) // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this - { + if ( getline(inFile, line) ) { // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this ++i; std::vector tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); unsigned id = Scan(tokens[0]); - if (! ( (id == 1) && (tokens[1] == "UNK") )) - { + if (! ( (id == 1) && (tokens[1] == "UNK") )) { const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading? bool stored = Store(factor, id); UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry."); } } - while ( getline(inFile, line) ) - { + while ( getline(inFile, line) ) { ++i; std::vector tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); @@ -104,8 +101,7 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular std::string line; unsigned i = 0; - while ( getline(inFile, line) ) - { + while ( getline(inFile, line) ) { ++i; std::vector tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); @@ -183,35 +179,31 @@ void Model1Feature::Load() } void Model1Feature::EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore) const + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore) const { const Sentence& sentence = static_cast(input); float score = 0.0; float norm = TransformScore(1+sentence.GetSize()); - for (size_t posT=0; posT read_lock(m_accessLock); - #endif +#endif boost::unordered_map >::const_iterator sentenceCache = m_cache.find(&input); - if (sentenceCache != m_cache.end()) - { + if (sentenceCache != m_cache.end()) { boost::unordered_map::const_iterator cacheHit = sentenceCache->second.find(wordT[0]); - if (cacheHit != sentenceCache->second.end()) - { + if (cacheHit != sentenceCache->second.end()) { foundInCache = true; score += cacheHit->second; FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl); @@ -219,10 +211,8 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input } } - if (!foundInCache) - { - for (size_t posS=1; posS and - { + if (!foundInCache) { + for (size_t posS=1; posS and const Word &wordS = sentence.GetWord(posS); float modelProb = m_model1.GetProbability(wordS[0],wordT[0]); FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl); @@ -231,10 +221,10 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input float thisWordScore = TransformScore(thisWordProb) - norm; FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl); { - #ifdef WITH_THREADS +#ifdef WITH_THREADS // need to update cache; write lock boost::unique_lock lock(m_accessLock); - #endif +#endif m_cache[&input][wordT[0]] = thisWordScore; } score += thisWordScore; @@ -247,14 +237,13 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source) { - #ifdef WITH_THREADS +#ifdef WITH_THREADS // need to update cache; write lock boost::unique_lock lock(m_accessLock); - #endif +#endif // clear cache boost::unordered_map >::iterator sentenceCache = m_cache.find(&source); - if (sentenceCache != m_cache.end()) - { + if (sentenceCache != m_cache.end()) { sentenceCache->second.clear(); m_cache.erase(sentenceCache); } diff --git a/moses/FF/Model1Feature.h b/moses/FF/Model1Feature.h index 9c380e3ae..610a39808 100644 --- a/moses/FF/Model1Feature.h +++ b/moses/FF/Model1Feature.h @@ -17,7 +17,7 @@ class Model1Vocabulary { public: - #define INVALID_ID std::numeric_limits::max() // UINT_MAX +#define INVALID_ID std::numeric_limits::max() // UINT_MAX static const std::string GIZANULL; Model1Vocabulary(); @@ -103,10 +103,10 @@ private: // cache mutable boost::unordered_map > m_cache; - #ifdef WITH_THREADS +#ifdef WITH_THREADS // reader-writer lock mutable boost::shared_mutex m_accessLock; - #endif +#endif }; diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp index 2a59340ea..1c9a3f738 100644 --- a/moses/FF/PhraseOrientationFeature.cpp +++ b/moses/FF/PhraseOrientationFeature.cpp @@ -288,7 +288,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( if (currTarPhr.GetAlignNonTerm().GetSize() != 0) { const boost::shared_ptr data = currTarPhr.GetData("Orientation"); UTIL_THROW_IF2(!data, GetScoreProducerDescription() - << ": Orientation data not set in target phrase. "); + << ": Orientation data not set in target phrase. "); reoClassData = static_cast( data.get() ); } diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h index aaee79a15..4460a1ea7 100644 --- a/moses/FF/PhraseOrientationFeature.h +++ b/moses/FF/PhraseOrientationFeature.h @@ -301,15 +301,15 @@ class PhraseOrientationFeature : public StatefulFeatureFunction public: struct ReoClassData { - public: - std::vector nonTerminalReoClassL2R; - std::vector nonTerminalReoClassR2L; - bool firstNonTerminalIsBoundary; - bool firstNonTerminalPreviousSourceSpanIsAligned; - bool firstNonTerminalFollowingSourceSpanIsAligned; - bool lastNonTerminalIsBoundary; - bool lastNonTerminalPreviousSourceSpanIsAligned; - bool lastNonTerminalFollowingSourceSpanIsAligned; + public: + std::vector nonTerminalReoClassL2R; + std::vector nonTerminalReoClassR2L; + bool firstNonTerminalIsBoundary; + bool firstNonTerminalPreviousSourceSpanIsAligned; + bool firstNonTerminalFollowingSourceSpanIsAligned; + bool lastNonTerminalIsBoundary; + bool lastNonTerminalPreviousSourceSpanIsAligned; + bool lastNonTerminalFollowingSourceSpanIsAligned; }; PhraseOrientationFeature(const std::string &line); diff --git a/moses/FF/RulePairUnlexicalizedSource.cpp b/moses/FF/RulePairUnlexicalizedSource.cpp index 148d54052..f490a2b1a 100644 --- a/moses/FF/RulePairUnlexicalizedSource.cpp +++ b/moses/FF/RulePairUnlexicalizedSource.cpp @@ -39,9 +39,9 @@ void RulePairUnlexicalizedSource::SetParameter(const std::string& key, const std void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const { const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0]; if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) { @@ -51,8 +51,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source return; } - for (size_t posS=0; posSGetString() << "|"; for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin(); - it!=targetPhrase.GetAlignNonTerm().end(); ++it) - { + it!=targetPhrase.GetAlignNonTerm().end(); ++it) { namestr << "|" << it->first << "-" << it->second; } diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp index 08987537d..c894a2b20 100644 --- a/moses/FF/RuleScope.cpp +++ b/moses/FF/RuleScope.cpp @@ -26,16 +26,16 @@ void RuleScope::EvaluateInIsolation(const Phrase &source , ScoreComponentCollection &estimatedFutureScore) const { if (IsGlueRule(source)) { - return; + return; } float score = 0; if (source.GetSize() > 0 && source.Front().IsNonTerminal()) { - ++score; + ++score; } if (source.GetSize() > 1 && source.Back().IsNonTerminal()) { - ++score; + ++score; } /* @@ -61,23 +61,20 @@ void RuleScope::EvaluateInIsolation(const Phrase &source */ if (m_perScope) { - UTIL_THROW_IF2(m_numScoreComponents <= score, - "Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score); - vector scores(m_numScoreComponents, 0); - scores[score] = 1; + UTIL_THROW_IF2(m_numScoreComponents <= score, + "Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score); + vector scores(m_numScoreComponents, 0); + scores[score] = 1; - if (m_futureCostOnly) { - estimatedFutureScore.PlusEquals(this, scores); - } - else { - scoreBreakdown.PlusEquals(this, scores); - } - } - else if (m_futureCostOnly) { - estimatedFutureScore.PlusEquals(this, score); - } - else { - scoreBreakdown.PlusEquals(this, score); + if (m_futureCostOnly) { + estimatedFutureScore.PlusEquals(this, scores); + } else { + scoreBreakdown.PlusEquals(this, scores); + } + } else if (m_futureCostOnly) { + estimatedFutureScore.PlusEquals(this, score); + } else { + scoreBreakdown.PlusEquals(this, score); } } @@ -85,14 +82,11 @@ void RuleScope::SetParameter(const std::string& key, const std::string& value) { if (key == "source-syntax") { m_sourceSyntax = Scan(value); - } - else if (key == "per-scope") { - m_perScope = Scan(value); - } - else if ("future-cost-only") { - m_futureCostOnly = Scan(value); - } - else { + } else if (key == "per-scope") { + m_perScope = Scan(value); + } else if ("future-cost-only") { + m_futureCostOnly = Scan(value); + } else { StatelessFeatureFunction::SetParameter(key, value); } } diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp index f2988f2b9..fc1fcdc5b 100644 --- a/moses/FF/TreeStructureFeature.cpp +++ b/moses/FF/TreeStructureFeature.cpp @@ -72,7 +72,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "")); if (m_binarized && full_sentence) { - mytree->Unbinarize(); + mytree->Unbinarize(); } return new TreeState(mytree); diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h index c94791c32..bd59a41a4 100644 --- a/moses/FF/VW/VW.h +++ b/moses/FF/VW/VW.h @@ -183,8 +183,8 @@ public: // optionally update translation options using leave-one-out std::vector keep = (m_leaveOneOut.size() > 0) - ? LeaveOneOut(translationOptionList, correct) - : std::vector(translationOptionList.size(), true); + ? LeaveOneOut(translationOptionList, correct) + : std::vector(translationOptionList.size(), true); // check whether we (still) have some correct translation int firstCorrect = -1; @@ -312,11 +312,11 @@ public: return; UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput, - "This feature function requires the TabbedSentence input type"); + "This feature function requires the TabbedSentence input type"); const TabbedSentence& tabbedSentence = static_cast(source); UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2, - "TabbedSentence must contain targetalignment"); + "TabbedSentence must contain targetalignment"); // target sentence represented as a phrase Phrase *target = new Phrase(); diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index b792d11f8..bc466664a 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -45,630 +45,633 @@ namespace Moses { #ifdef USE_HYPO_POOL - ObjectPool Hypothesis::s_objectPool("Hypothesis", 300000); +ObjectPool Hypothesis::s_objectPool("Hypothesis", 300000); #endif - Hypothesis:: - Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt) - : m_prevHypo(NULL) - , m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted) - , m_sourceInput(source) - , m_currSourceWordsRange( - m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND, - m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND) - , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND) - , m_wordDeleted(false) - , m_totalScore(0.0f) - , m_futureScore(0.0f) - , m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size()) - , m_arcList(NULL) - , m_transOpt(initialTransOpt) - , m_manager(manager) - , m_id(m_manager.GetNextHypoId()) - { - // used for initial seeding of trans process - // initialize scores - //_hash_computed = false; - //s_HypothesesCreated = 1; - const vector& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); - for (unsigned i = 0; i < ffs.size(); ++i) - m_ffStates[i] = ffs[i]->EmptyHypothesisState(source); - m_manager.GetSentenceStats().AddCreated(); - } +Hypothesis:: +Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt) + : m_prevHypo(NULL) + , m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted) + , m_sourceInput(source) + , m_currSourceWordsRange( + m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND, + m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND) + , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND) + , m_wordDeleted(false) + , m_totalScore(0.0f) + , m_futureScore(0.0f) + , m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size()) + , m_arcList(NULL) + , m_transOpt(initialTransOpt) + , m_manager(manager) + , m_id(m_manager.GetNextHypoId()) +{ + // used for initial seeding of trans process + // initialize scores + //_hash_computed = false; + //s_HypothesesCreated = 1; + const vector& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (unsigned i = 0; i < ffs.size(); ++i) + m_ffStates[i] = ffs[i]->EmptyHypothesisState(source); + m_manager.GetSentenceStats().AddCreated(); +} - /*** - * continue prevHypo by appending the phrases in transOpt - */ - Hypothesis:: - Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt) - : m_prevHypo(&prevHypo) - , m_sourceCompleted(prevHypo.m_sourceCompleted ) - , m_sourceInput(prevHypo.m_sourceInput) - , m_currSourceWordsRange(transOpt.GetSourceWordsRange()) - , m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1, - prevHypo.m_currTargetWordsRange.GetEndPos() - + transOpt.GetTargetPhrase().GetSize()) - , m_wordDeleted(false) - , m_totalScore(0.0f) - , m_futureScore(0.0f) - , m_ffStates(prevHypo.m_ffStates.size()) - , m_arcList(NULL) - , m_transOpt(transOpt) - , m_manager(prevHypo.GetManager()) - , m_id(m_manager.GetNextHypoId()) - { - m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown()); +/*** + * continue prevHypo by appending the phrases in transOpt + */ +Hypothesis:: +Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt) + : m_prevHypo(&prevHypo) + , m_sourceCompleted(prevHypo.m_sourceCompleted ) + , m_sourceInput(prevHypo.m_sourceInput) + , m_currSourceWordsRange(transOpt.GetSourceWordsRange()) + , m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1, + prevHypo.m_currTargetWordsRange.GetEndPos() + + transOpt.GetTargetPhrase().GetSize()) + , m_wordDeleted(false) + , m_totalScore(0.0f) + , m_futureScore(0.0f) + , m_ffStates(prevHypo.m_ffStates.size()) + , m_arcList(NULL) + , m_transOpt(transOpt) + , m_manager(prevHypo.GetManager()) + , m_id(m_manager.GetNextHypoId()) +{ + m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown()); - // assert that we are not extending our hypothesis by retranslating something - // that this hypothesis has already translated! - assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange)); + // assert that we are not extending our hypothesis by retranslating something + // that this hypothesis has already translated! + assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange)); - //_hash_computed = false; - m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true); - m_wordDeleted = transOpt.IsDeletionOption(); - m_manager.GetSentenceStats().AddCreated(); - } + //_hash_computed = false; + m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true); + m_wordDeleted = transOpt.IsDeletionOption(); + m_manager.GetSentenceStats().AddCreated(); +} - Hypothesis:: - ~Hypothesis() - { - for (unsigned i = 0; i < m_ffStates.size(); ++i) - delete m_ffStates[i]; +Hypothesis:: +~Hypothesis() +{ + for (unsigned i = 0; i < m_ffStates.size(); ++i) + delete m_ffStates[i]; - if (m_arcList) { - ArcList::iterator iter; - for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) { - FREEHYPO(*iter); - } - m_arcList->clear(); - - delete m_arcList; - m_arcList = NULL; + if (m_arcList) { + ArcList::iterator iter; + for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) { + FREEHYPO(*iter); } - } + m_arcList->clear(); - void - Hypothesis:: - AddArc(Hypothesis *loserHypo) - { - if (!m_arcList) { - if (loserHypo->m_arcList) { // we don't have an arcList, but loser does - this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete - loserHypo->m_arcList = 0; // prevent a double deletion - } else { - this->m_arcList = new ArcList(); - } + delete m_arcList; + m_arcList = NULL; + } +} + +void +Hypothesis:: +AddArc(Hypothesis *loserHypo) +{ + if (!m_arcList) { + if (loserHypo->m_arcList) { // we don't have an arcList, but loser does + this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete + loserHypo->m_arcList = 0; // prevent a double deletion } else { - if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser - size_t my_size = m_arcList->size(); - size_t add_size = loserHypo->m_arcList->size(); - this->m_arcList->resize(my_size + add_size, 0); - std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *)); - delete loserHypo->m_arcList; - loserHypo->m_arcList = 0; - } else { // loserHypo doesn't have any arcs - // DO NOTHING - } + this->m_arcList = new ArcList(); + } + } else { + if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser + size_t my_size = m_arcList->size(); + size_t add_size = loserHypo->m_arcList->size(); + this->m_arcList->resize(my_size + add_size, 0); + std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *)); + delete loserHypo->m_arcList; + loserHypo->m_arcList = 0; + } else { // loserHypo doesn't have any arcs + // DO NOTHING } - m_arcList->push_back(loserHypo); } + m_arcList->push_back(loserHypo); +} - /*** - * return the subclass of Hypothesis most appropriate to the given translation option - */ - Hypothesis* - Hypothesis:: - CreateNext(const TranslationOption &transOpt) const - { - return Create(*this, transOpt); - } +/*** + * return the subclass of Hypothesis most appropriate to the given translation option + */ +Hypothesis* +Hypothesis:: +CreateNext(const TranslationOption &transOpt) const +{ + return Create(*this, transOpt); +} - /*** - * return the subclass of Hypothesis most appropriate to the given translation option - */ - Hypothesis* - Hypothesis:: - Create(const Hypothesis &prevHypo, const TranslationOption &transOpt) - { +/*** + * return the subclass of Hypothesis most appropriate to the given translation option + */ +Hypothesis* +Hypothesis:: +Create(const Hypothesis &prevHypo, const TranslationOption &transOpt) +{ #ifdef USE_HYPO_POOL - Hypothesis *ptr = s_objectPool.getPtr(); - return new(ptr) Hypothesis(prevHypo, transOpt); + Hypothesis *ptr = s_objectPool.getPtr(); + return new(ptr) Hypothesis(prevHypo, transOpt); #else - return new Hypothesis(prevHypo, transOpt); + return new Hypothesis(prevHypo, transOpt); #endif - } - /*** - * return the subclass of Hypothesis most appropriate to the given target phrase - */ +} +/*** + * return the subclass of Hypothesis most appropriate to the given target phrase + */ - Hypothesis* - Hypothesis:: - Create(Manager& manager, InputType const& m_source, - const TranslationOption &initialTransOpt) - { +Hypothesis* +Hypothesis:: +Create(Manager& manager, InputType const& m_source, + const TranslationOption &initialTransOpt) +{ #ifdef USE_HYPO_POOL - Hypothesis *ptr = s_objectPool.getPtr(); - return new(ptr) Hypothesis(manager, m_source, initialTransOpt); + Hypothesis *ptr = s_objectPool.getPtr(); + return new(ptr) Hypothesis(manager, m_source, initialTransOpt); #else - return new Hypothesis(manager, m_source, initialTransOpt); + return new Hypothesis(manager, m_source, initialTransOpt); #endif +} + +/** check, if two hypothesis can be recombined. + this is actually a sorting function that allows us to + keep an ordered list of hypotheses. This makes recombination + much quicker. +*/ +int +Hypothesis:: +RecombineCompare(const Hypothesis &compare) const +{ + // -1 = this < compare + // +1 = this > compare + // 0 = this ==compare + int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted); + if (comp != 0) + return comp; + + for (unsigned i = 0; i < m_ffStates.size(); ++i) { + if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) { + comp = m_ffStates[i] - compare.m_ffStates[i]; + } else { + comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]); + } + if (comp != 0) return comp; } - /** check, if two hypothesis can be recombined. - this is actually a sorting function that allows us to - keep an ordered list of hypotheses. This makes recombination - much quicker. - */ - int - Hypothesis:: - RecombineCompare(const Hypothesis &compare) const - { - // -1 = this < compare - // +1 = this > compare - // 0 = this ==compare - int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted); - if (comp != 0) - return comp; + return 0; +} - for (unsigned i = 0; i < m_ffStates.size(); ++i) { - if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) { - comp = m_ffStates[i] - compare.m_ffStates[i]; +void +Hypothesis:: +EvaluateWhenApplied(StatefulFeatureFunction const& sfff, + int state_idx) +{ + const StaticData &staticData = StaticData::Instance(); + if (! staticData.IsFeatureFunctionIgnored( sfff )) { + m_ffStates[state_idx] + = sfff.EvaluateWhenApplied + (*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL, + &m_currScoreBreakdown); + } +} + +void +Hypothesis:: +EvaluateWhenApplied(const StatelessFeatureFunction& slff) +{ + const StaticData &staticData = StaticData::Instance(); + if (! staticData.IsFeatureFunctionIgnored( slff )) { + slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown); + } +} + +/*** + * calculate the logarithm of our total translation score (sum up components) + */ +void +Hypothesis:: +EvaluateWhenApplied(const SquareMatrix &futureScore) +{ + IFVERBOSE(2) { + m_manager.GetSentenceStats().StartTimeOtherScore(); + } + // some stateless score producers cache their values in the translation + // option: add these here + // language model scores for n-grams completely contained within a target + // phrase are also included here + + // compute values of stateless feature functions that were not + // cached in the translation option + const vector& sfs = + StatelessFeatureFunction::GetStatelessFeatureFunctions(); + for (unsigned i = 0; i < sfs.size(); ++i) { + const StatelessFeatureFunction &ff = *sfs[i]; + EvaluateWhenApplied(ff); + } + + const vector& ffs = + StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (unsigned i = 0; i < ffs.size(); ++i) { + const StatefulFeatureFunction &ff = *ffs[i]; + const StaticData &staticData = StaticData::Instance(); + if (! staticData.IsFeatureFunctionIgnored(ff)) { + m_ffStates[i] = ff.EvaluateWhenApplied(*this, + m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, + &m_currScoreBreakdown); + } + } + + IFVERBOSE(2) { + m_manager.GetSentenceStats().StopTimeOtherScore(); + m_manager.GetSentenceStats().StartTimeEstimateScore(); + } + + // FUTURE COST + m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted ); + + // TOTAL + m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore; + if (m_prevHypo) m_totalScore += m_prevHypo->GetScore(); + + IFVERBOSE(2) { + m_manager.GetSentenceStats().StopTimeEstimateScore(); + } +} + +const Hypothesis* Hypothesis::GetPrevHypo()const +{ + return m_prevHypo; +} + +/** + * print hypothesis information for pharaoh-style logging + */ +void +Hypothesis:: +PrintHypothesis() const +{ + if (!m_prevHypo) { + TRACE_ERR(endl << "NULL hypo" << endl); + return; + } + TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( "); + int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1); + int start = end-1; + if ( start < 0 ) start = 0; + if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) { + TRACE_ERR( " "); + } else { + TRACE_ERR( "... "); + } + if (end>=0) { + WordsRange range(start, end); + TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " "); + } + TRACE_ERR( ")"<m_totalScore - m_prevHypo->m_futureScore) < translation cost "<GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<size() > nBestSize * 5) { + // prune arc list only if there too many arcs + NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1, + m_arcList->end(), CompareHypothesisTotalScore()); + + // delete bad ones + ArcList::iterator iter; + for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter) + FREEHYPO(*iter); + m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end()); + } + + // set all arc's main hypo variable to this hypo + ArcList::iterator iter = m_arcList->begin(); + for (; iter != m_arcList->end() ; ++iter) { + Hypothesis *arc = *iter; + arc->SetWinningHypo(this); + } +} + +TargetPhrase const& +Hypothesis:: +GetCurrTargetPhrase() const +{ + return m_transOpt.GetTargetPhrase(); +} + +void +Hypothesis:: +GetOutputPhrase(Phrase &out) const +{ + if (m_prevHypo != NULL) + m_prevHypo->GetOutputPhrase(out); + out.Append(GetCurrTargetPhrase()); +} + +TO_STRING_BODY(Hypothesis) + +// friend +ostream& operator<<(ostream& out, const Hypothesis& hypo) +{ + hypo.ToStream(out); + // words bitmap + out << "[" << hypo.m_sourceCompleted << "] "; + + // scores + out << " [total=" << hypo.GetTotalScore() << "]"; + out << " " << hypo.GetScoreBreakdown(); + + // alignment + out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm(); + + return out; +} + + +std::string +Hypothesis:: +GetSourcePhraseStringRep(const vector factorsToPrint) const +{ + return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint); +} + +std::string +Hypothesis:: +GetTargetPhraseStringRep(const vector factorsToPrint) const +{ + return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : ""); +} + +std::string +Hypothesis:: +GetSourcePhraseStringRep() const +{ + vector allFactors(MAX_NUM_FACTORS); + for(size_t i=0; i < MAX_NUM_FACTORS; i++) + allFactors[i] = i; + return GetSourcePhraseStringRep(allFactors); +} + +std::string +Hypothesis:: +GetTargetPhraseStringRep() const +{ + vector allFactors(MAX_NUM_FACTORS); + for(size_t i=0; i < MAX_NUM_FACTORS; i++) + allFactors[i] = i; + return GetTargetPhraseStringRep(allFactors); +} + +void +Hypothesis:: +OutputAlignment(std::ostream &out) const +{ + std::vector edges; + const Hypothesis *currentHypo = this; + while (currentHypo) { + edges.push_back(currentHypo); + currentHypo = currentHypo->GetPrevHypo(); + } + + OutputAlignment(out, edges); + +} + +void +Hypothesis:: +OutputAlignment(ostream &out, const vector &edges) +{ + size_t targetOffset = 0; + + for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { + const Hypothesis &edge = *edges[currEdge]; + const TargetPhrase &tp = edge.GetCurrTargetPhrase(); + size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); + + OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); + + targetOffset += tp.GetSize(); + } + // Used by --print-alignment-info, so no endl +} + +void +Hypothesis:: +OutputAlignment(ostream &out, const AlignmentInfo &ai, + size_t sourceOffset, size_t targetOffset) +{ + typedef std::vector< const std::pair* > AlignVec; + AlignVec alignments = ai.GetSortedAlignments(); + + AlignVec::const_iterator it; + for (it = alignments.begin(); it != alignments.end(); ++it) { + const std::pair &alignment = **it; + out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; + } + +} + +void +Hypothesis:: +OutputInput(std::vector& map, const Hypothesis* hypo) +{ + if (!hypo->GetPrevHypo()) return; + OutputInput(map, hypo->GetPrevHypo()); + map[hypo->GetCurrSourceWordsRange().GetStartPos()] + = &hypo->GetTranslationOption().GetInputPath().GetPhrase(); +} + +void +Hypothesis:: +OutputInput(std::ostream& os) const +{ + size_t len = this->GetInput().GetSize(); + std::vector inp_phrases(len, 0); + OutputInput(inp_phrases, this); + for (size_t i=0; i &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const +{ + if (m_prevHypo) { + // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence + m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors); + } + OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors); +} + +////////////////////////////////////////////////////////////////////////// +/*** + * print surface factor only for the given phrase + */ +void +Hypothesis:: +OutputSurface(std::ostream &out, const Hypothesis &edge, + const std::vector &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const +{ + UTIL_THROW_IF2(outputFactorOrder.size() == 0, + "Must specific at least 1 output factor"); + const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); + bool markUnknown = StaticData::Instance().GetMarkUnknown(); + if (reportAllFactors == true) { + out << phrase; + } else { + FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); + + std::map placeholders; + if (placeholderFactor != NOT_FOUND) { + // creates map of target position -> factor for placeholders + placeholders = GetPlaceholders(edge, placeholderFactor); + } + + size_t size = phrase.GetSize(); + for (size_t pos = 0 ; pos < size ; pos++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); + + if (placeholders.size()) { + // do placeholders + std::map::const_iterator iter = placeholders.find(pos); + if (iter != placeholders.end()) { + factor = iter->second; + } + } + + UTIL_THROW_IF2(factor == NULL, + "No factor 0 at position " << pos); + + //preface surface form with UNK if marking unknowns + const Word &word = phrase.GetWord(pos); + if(markUnknown && word.IsOOV()) { + out << "UNK" << *factor; } else { - comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]); - } - if (comp != 0) return comp; - } - - return 0; - } - - void - Hypothesis:: - EvaluateWhenApplied(StatefulFeatureFunction const& sfff, - int state_idx) - { - const StaticData &staticData = StaticData::Instance(); - if (! staticData.IsFeatureFunctionIgnored( sfff )) - { - m_ffStates[state_idx] - = sfff.EvaluateWhenApplied - (*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL, - &m_currScoreBreakdown); - } - } - - void - Hypothesis:: - EvaluateWhenApplied(const StatelessFeatureFunction& slff) - { - const StaticData &staticData = StaticData::Instance(); - if (! staticData.IsFeatureFunctionIgnored( slff )) { - slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown); - } - } - - /*** - * calculate the logarithm of our total translation score (sum up components) - */ - void - Hypothesis:: - EvaluateWhenApplied(const SquareMatrix &futureScore) - { - IFVERBOSE(2) { - m_manager.GetSentenceStats().StartTimeOtherScore(); - } - // some stateless score producers cache their values in the translation - // option: add these here - // language model scores for n-grams completely contained within a target - // phrase are also included here - - // compute values of stateless feature functions that were not - // cached in the translation option - const vector& sfs = - StatelessFeatureFunction::GetStatelessFeatureFunctions(); - for (unsigned i = 0; i < sfs.size(); ++i) { - const StatelessFeatureFunction &ff = *sfs[i]; - EvaluateWhenApplied(ff); - } - - const vector& ffs = - StatefulFeatureFunction::GetStatefulFeatureFunctions(); - for (unsigned i = 0; i < ffs.size(); ++i) { - const StatefulFeatureFunction &ff = *ffs[i]; - const StaticData &staticData = StaticData::Instance(); - if (! staticData.IsFeatureFunctionIgnored(ff)) { - m_ffStates[i] = ff.EvaluateWhenApplied(*this, - m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, - &m_currScoreBreakdown); - } - } - - IFVERBOSE(2) { - m_manager.GetSentenceStats().StopTimeOtherScore(); - m_manager.GetSentenceStats().StartTimeEstimateScore(); - } - - // FUTURE COST - m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted ); - - // TOTAL - m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore; - if (m_prevHypo) m_totalScore += m_prevHypo->GetScore(); - - IFVERBOSE(2) { - m_manager.GetSentenceStats().StopTimeEstimateScore(); - } - } - - const Hypothesis* Hypothesis::GetPrevHypo()const - { - return m_prevHypo; - } - - /** - * print hypothesis information for pharaoh-style logging - */ - void - Hypothesis:: - PrintHypothesis() const - { - if (!m_prevHypo) { - TRACE_ERR(endl << "NULL hypo" << endl); - return; - } - TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( "); - int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1); - int start = end-1; - if ( start < 0 ) start = 0; - if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) { - TRACE_ERR( " "); - } else { - TRACE_ERR( "... "); - } - if (end>=0) { - WordsRange range(start, end); - TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " "); - } - TRACE_ERR( ")"<m_totalScore - m_prevHypo->m_futureScore) < translation cost "<GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<size() > nBestSize * 5) - { - // prune arc list only if there too many arcs - NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1, - m_arcList->end(), CompareHypothesisTotalScore()); - - // delete bad ones - ArcList::iterator iter; - for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter) - FREEHYPO(*iter); - m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end()); + out << *factor; } - // set all arc's main hypo variable to this hypo - ArcList::iterator iter = m_arcList->begin(); - for (; iter != m_arcList->end() ; ++iter) { - Hypothesis *arc = *iter; - arc->SetWinningHypo(this); - } - } + for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); + UTIL_THROW_IF2(factor == NULL, + "No factor " << i << " at position " << pos); - TargetPhrase const& - Hypothesis:: - GetCurrTargetPhrase() const - { return m_transOpt.GetTargetPhrase(); } - - void - Hypothesis:: - GetOutputPhrase(Phrase &out) const - { - if (m_prevHypo != NULL) - m_prevHypo->GetOutputPhrase(out); - out.Append(GetCurrTargetPhrase()); - } - - TO_STRING_BODY(Hypothesis) - - // friend - ostream& operator<<(ostream& out, const Hypothesis& hypo) - { - hypo.ToStream(out); - // words bitmap - out << "[" << hypo.m_sourceCompleted << "] "; - - // scores - out << " [total=" << hypo.GetTotalScore() << "]"; - out << " " << hypo.GetScoreBreakdown(); - - // alignment - out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm(); - - return out; - } - - - std::string - Hypothesis:: - GetSourcePhraseStringRep(const vector factorsToPrint) const - { return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint); } - - std::string - Hypothesis:: - GetTargetPhraseStringRep(const vector factorsToPrint) const - { return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : ""); } - - std::string - Hypothesis:: - GetSourcePhraseStringRep() const - { - vector allFactors(MAX_NUM_FACTORS); - for(size_t i=0; i < MAX_NUM_FACTORS; i++) - allFactors[i] = i; - return GetSourcePhraseStringRep(allFactors); - } - - std::string - Hypothesis:: - GetTargetPhraseStringRep() const - { - vector allFactors(MAX_NUM_FACTORS); - for(size_t i=0; i < MAX_NUM_FACTORS; i++) - allFactors[i] = i; - return GetTargetPhraseStringRep(allFactors); - } - - void - Hypothesis:: - OutputAlignment(std::ostream &out) const - { - std::vector edges; - const Hypothesis *currentHypo = this; - while (currentHypo) { - edges.push_back(currentHypo); - currentHypo = currentHypo->GetPrevHypo(); - } - - OutputAlignment(out, edges); - - } - - void - Hypothesis:: - OutputAlignment(ostream &out, const vector &edges) - { - size_t targetOffset = 0; - - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; - const TargetPhrase &tp = edge.GetCurrTargetPhrase(); - size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - - OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); - - targetOffset += tp.GetSize(); - } - // Used by --print-alignment-info, so no endl - } - - void - Hypothesis:: - OutputAlignment(ostream &out, const AlignmentInfo &ai, - size_t sourceOffset, size_t targetOffset) - { - typedef std::vector< const std::pair* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(); - - AlignVec::const_iterator it; - for (it = alignments.begin(); it != alignments.end(); ++it) { - const std::pair &alignment = **it; - out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; - } - - } - - void - Hypothesis:: - OutputInput(std::vector& map, const Hypothesis* hypo) - { - if (!hypo->GetPrevHypo()) return; - OutputInput(map, hypo->GetPrevHypo()); - map[hypo->GetCurrSourceWordsRange().GetStartPos()] - = &hypo->GetTranslationOption().GetInputPath().GetPhrase(); - } - - void - Hypothesis:: - OutputInput(std::ostream& os) const - { - size_t len = this->GetInput().GetSize(); - std::vector inp_phrases(len, 0); - OutputInput(inp_phrases, this); - for (size_t i=0; i &outputFactorOrder, - char reportSegmentation, bool reportAllFactors) const - { - if (m_prevHypo) - { // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence - m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors); + out << "|" << *factor; } - OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors); - } - - ////////////////////////////////////////////////////////////////////////// - /*** - * print surface factor only for the given phrase - */ - void - Hypothesis:: - OutputSurface(std::ostream &out, const Hypothesis &edge, - const std::vector &outputFactorOrder, - char reportSegmentation, bool reportAllFactors) const - { - UTIL_THROW_IF2(outputFactorOrder.size() == 0, - "Must specific at least 1 output factor"); - const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); - bool markUnknown = StaticData::Instance().GetMarkUnknown(); - if (reportAllFactors == true) { - out << phrase; - } else { - FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); - - std::map placeholders; - if (placeholderFactor != NOT_FOUND) { - // creates map of target position -> factor for placeholders - placeholders = GetPlaceholders(edge, placeholderFactor); - } - - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - - if (placeholders.size()) { - // do placeholders - std::map::const_iterator iter = placeholders.find(pos); - if (iter != placeholders.end()) { - factor = iter->second; - } - } - - UTIL_THROW_IF2(factor == NULL, - "No factor 0 at position " << pos); - - //preface surface form with UNK if marking unknowns - const Word &word = phrase.GetWord(pos); - if(markUnknown && word.IsOOV()) { - out << "UNK" << *factor; - } else { - out << *factor; - } - - for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); - UTIL_THROW_IF2(factor == NULL, - "No factor " << i << " at position " << pos); - - out << "|" << *factor; - } - out << " "; - } - } - - // trace ("report segmentation") option "-t" / "-tt" - if (reportSegmentation > 0 && phrase.GetSize() > 0) { - const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); - const int sourceStart = sourceRange.GetStartPos(); - const int sourceEnd = sourceRange.GetEndPos(); - out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" - if (reportSegmentation == 2) { - out << ",wa="; - const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); - Hypothesis::OutputAlignment(out, ai, 0, 0); - out << ",total="; - out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); - out << ","; - ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); - scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out); - } - out << "| "; + out << " "; } } - std::map - Hypothesis:: - GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const - { - const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath(); - const Phrase &inputPhrase = inputPath.GetPhrase(); - - std::map ret; - - for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { - const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor); - if (factor) { - std::set targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); - UTIL_THROW_IF2(targetPos.size() != 1, - "Placeholder should be aligned to 1, and only 1, word"); - ret[*targetPos.begin()] = factor; - } + // trace ("report segmentation") option "-t" / "-tt" + if (reportSegmentation > 0 && phrase.GetSize() > 0) { + const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); + const int sourceStart = sourceRange.GetStartPos(); + const int sourceEnd = sourceRange.GetEndPos(); + out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" + if (reportSegmentation == 2) { + out << ",wa="; + const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); + Hypothesis::OutputAlignment(out, ai, 0, 0); + out << ",total="; + out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); + out << ","; + ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); + scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); + scoreBreakdown.OutputAllFeatureScores(out); } - - return ret; + out << "| "; } +} + +std::map +Hypothesis:: +GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const +{ + const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath(); + const Phrase &inputPhrase = inputPath.GetPhrase(); + + std::map ret; + + for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { + const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor); + if (factor) { + std::set targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); + UTIL_THROW_IF2(targetPos.size() != 1, + "Placeholder should be aligned to 1, and only 1, word"); + ret[*targetPos.begin()] = factor; + } + } + + return ret; +} #ifdef HAVE_XMLRPC_C - void - Hypothesis:: - OutputLocalWordAlignment(vector& dest) const - { - using namespace std; - WordsRange const& src = this->GetCurrSourceWordsRange(); - WordsRange const& trg = this->GetCurrTargetWordsRange(); +void +Hypothesis:: +OutputLocalWordAlignment(vector& dest) const +{ + using namespace std; + WordsRange const& src = this->GetCurrSourceWordsRange(); + WordsRange const& trg = this->GetCurrTargetWordsRange(); - vector const* > a - = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(); - typedef pair item; - map M; - BOOST_FOREACH(item const* p, a) - { - M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first); - M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second); - dest.push_back(xmlrpc_c::value_struct(M)); - } + vector const* > a + = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(); + typedef pair item; + map M; + BOOST_FOREACH(item const* p, a) { + M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first); + M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second); + dest.push_back(xmlrpc_c::value_struct(M)); } +} - void - Hypothesis:: - OutputWordAlignment(vector& out) const - { - vector tmp; - for (Hypothesis const* h = this; h; h = h->GetPrevHypo()) - tmp.push_back(h); - for (size_t i = tmp.size(); i-- > 0;) - tmp[i]->OutputLocalWordAlignment(out); - } +void +Hypothesis:: +OutputWordAlignment(vector& out) const +{ + vector tmp; + for (Hypothesis const* h = this; h; h = h->GetPrevHypo()) + tmp.push_back(h); + for (size_t i = tmp.size(); i-- > 0;) + tmp[i]->OutputLocalWordAlignment(out); +} #endif diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h index 0ce75b83c..e1e95fbf3 100644 --- a/moses/Hypothesis.h +++ b/moses/Hypothesis.h @@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore { ObjectPool &pool = Hypothesis::GetObjectPool(); \ pool.freeObject(hypo); \ } \ - + #else #define FREEHYPO(hypo) delete hypo #endif diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index 8ed9a02e5..c58c82dfa 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -161,7 +161,7 @@ public: return m_detailTreeFragmentsOutputCollector.get(); } - void SetInputStreamFromString(std::istringstream &input){ + void SetInputStreamFromString(std::istringstream &input) { m_inputStream = &input; } diff --git a/moses/LM/RDLM.cpp b/moses/LM/RDLM.cpp index 179b67095..70fabbc6e 100644 --- a/moses/LM/RDLM.cpp +++ b/moses/LM/RDLM.cpp @@ -13,12 +13,14 @@ namespace Moses typedef Eigen::Map > EigenMap; -RDLM::~RDLM() { +RDLM::~RDLM() +{ delete lm_head_base_instance_; delete lm_label_base_instance_; } -void RDLM::Load() { +void RDLM::Load() +{ lm_head_base_instance_ = new nplm::neuralTM(); lm_head_base_instance_->read(m_path_head_lm); @@ -87,8 +89,8 @@ void RDLM::Load() { // just score provided file, then exit. if (!m_debugPath.empty()) { - ScoreFile(m_debugPath); - exit(1); + ScoreFile(m_debugPath); + exit(1); } // { @@ -202,8 +204,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost // ignore glue rules if (root->GetLabel() == m_glueSymbol) { // recursion - for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) - { + for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) { Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels); } return; @@ -213,11 +214,11 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) { // recursion if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) { - root = back_pointers.find(root)->second.get(); - rescoring_levels = m_context_up-1; + root = back_pointers.find(root)->second.get(); + rescoring_levels = m_context_up-1; } for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) { - Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels); + Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels); } return; } @@ -239,35 +240,34 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) { // root of tree: score without context if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) { - std::vector ngram_head_null (static_head_null); - ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel()); - if (m_isPretermBackoff && ngram_head_null.back() == 0) { - ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel()); - } - if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) { - std::vector::iterator it = ngram_head_null.begin(); - std::fill_n(it, m_context_left, static_start_head); - it += m_context_left; - std::fill_n(it, m_context_left, static_start_label); - it += m_context_left; - std::fill_n(it, m_context_right, static_stop_head); - it += m_context_right; - std::fill_n(it, m_context_right, static_stop_label); - it += m_context_right; - size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size()); - it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); - it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); - } - if (ancestor_labels.size() >= m_context_up && !num_virtual) { - score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); - } - else { - boost::hash_combine(boundary_hash, ngram_head_null.back()); - score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); - } + std::vector ngram_head_null (static_head_null); + ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel()); + if (m_isPretermBackoff && ngram_head_null.back() == 0) { + ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel()); + } + if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) { + std::vector::iterator it = ngram_head_null.begin(); + std::fill_n(it, m_context_left, static_start_head); + it += m_context_left; + std::fill_n(it, m_context_left, static_start_label); + it += m_context_left; + std::fill_n(it, m_context_right, static_stop_head); + it += m_context_right; + std::fill_n(it, m_context_right, static_stop_label); + it += m_context_right; + size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size()); + it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); + it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); + } + if (ancestor_labels.size() >= m_context_up && !num_virtual) { + score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); + } else { + boost::hash_combine(boundary_hash, ngram_head_null.back()); + score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); + } } return; - // we only need to re-visit previous hypotheses if we have more context available. + // we only need to re-visit previous hypotheses if we have more context available. } else if (root->IsLeafNT()) { if (m_context_up > 1 && ancestor_heads.size()) { root = back_pointers.find(root)->second.get(); @@ -276,8 +276,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost return; } rescoring_levels = m_context_up-1; - } - else { + } else { return; } } @@ -302,19 +301,17 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost int reached_end = 0; int label_idx, label_idx_out; if (m_binarized && head_label[0] == '^') { - virtual_head = true; - if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) { - reached_end = 1; //indicate that we've seen the first symbol of the RHS - } - else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) { - reached_end = 2; // indicate that we've seen the last symbol of the RHS - } - // with 'full' binarization, direction is encoded in 2nd char - std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1); - label_idx = lm_label->lookup_input_word(clipped_label); - label_idx_out = lm_label->lookup_output_word(clipped_label); - } - else { + virtual_head = true; + if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) { + reached_end = 1; //indicate that we've seen the first symbol of the RHS + } else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) { + reached_end = 2; // indicate that we've seen the last symbol of the RHS + } + // with 'full' binarization, direction is encoded in 2nd char + std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1); + label_idx = lm_label->lookup_input_word(clipped_label); + label_idx_out = lm_label->lookup_output_word(clipped_label); + } else { reached_end = 3; // indicate that we've seen first and last symbol of the RHS label_idx = lm_label->lookup_input_word(head_label); label_idx_out = lm_label->lookup_output_word(head_label); @@ -324,49 +321,47 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost // root of tree: score without context if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) { - if (head_idx != static_dummy_head && head_idx != static_head_head) { - std::vector ngram_head_null (static_head_null); - *(ngram_head_null.end()-2) = label_idx; - ngram_head_null.back() = head_ids.second; - if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) { - std::vector::iterator it = ngram_head_null.begin(); - std::fill_n(it, m_context_left, static_start_head); - it += m_context_left; - std::fill_n(it, m_context_left, static_start_label); - it += m_context_left; - std::fill_n(it, m_context_right, static_stop_head); - it += m_context_right; - std::fill_n(it, m_context_right, static_stop_label); - it += m_context_right; - it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); - it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); - score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); - } - else { - boost::hash_combine(boundary_hash, ngram_head_null.back()); - score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); - } - } - std::vector ngram_label_null (static_label_null); - ngram_label_null.back() = label_idx_out; + if (head_idx != static_dummy_head && head_idx != static_head_head) { + std::vector ngram_head_null (static_head_null); + *(ngram_head_null.end()-2) = label_idx; + ngram_head_null.back() = head_ids.second; if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) { - std::vector::iterator it = ngram_label_null.begin(); - std::fill_n(it, m_context_left, static_start_head); - it += m_context_left; - std::fill_n(it, m_context_left, static_start_label); - it += m_context_left; - std::fill_n(it, m_context_right, static_stop_head); - it += m_context_right; - std::fill_n(it, m_context_right, static_stop_label); - it += m_context_right; - it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); - it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); - score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size()))); - } - else { - boost::hash_combine(boundary_hash, ngram_label_null.back()); - score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size()))); + std::vector::iterator it = ngram_head_null.begin(); + std::fill_n(it, m_context_left, static_start_head); + it += m_context_left; + std::fill_n(it, m_context_left, static_start_label); + it += m_context_left; + std::fill_n(it, m_context_right, static_stop_head); + it += m_context_right; + std::fill_n(it, m_context_right, static_stop_label); + it += m_context_right; + it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); + it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); + score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); + } else { + boost::hash_combine(boundary_hash, ngram_head_null.back()); + score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); } + } + std::vector ngram_label_null (static_label_null); + ngram_label_null.back() = label_idx_out; + if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) { + std::vector::iterator it = ngram_label_null.begin(); + std::fill_n(it, m_context_left, static_start_head); + it += m_context_left; + std::fill_n(it, m_context_left, static_start_label); + it += m_context_left; + std::fill_n(it, m_context_right, static_stop_head); + it += m_context_right; + std::fill_n(it, m_context_right, static_stop_label); + it += m_context_right; + it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); + it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); + score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size()))); + } else { + boost::hash_combine(boundary_hash, ngram_label_null.back()); + score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size()))); + } } ancestor_heads.push_back(head_idx); @@ -374,15 +369,14 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost if (virtual_head) { num_virtual = m_context_up; - } - else if (num_virtual) { - --num_virtual; + } else if (num_virtual) { + --num_virtual; } // fill ancestor context (same for all children) if (context_up_nonempty < m_context_up) { - ++context_up_nonempty; + ++context_up_nonempty; } size_t up_padding = m_context_up - context_up_nonempty; @@ -439,13 +433,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost std::vector::iterator it = ngram.begin(); if (left_padding > 0) { - it += left_padding; + it += left_padding; } it = std::copy(heads.begin()+left_offset, heads.begin()+i, it); if (left_padding > 0) { - it += left_padding; + it += left_padding; } it = std::copy(labels.begin()+left_offset, labels.begin()+i, it); @@ -453,33 +447,30 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost it = std::copy(heads.begin()+i+1, heads.begin()+right_offset, it); if (right_padding > 0) { - if (reached_end == 2 || reached_end == 3) { - std::fill_n(it, right_padding, static_stop_head); - it += right_padding; - } - else { - std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it); - } + if (reached_end == 2 || reached_end == 3) { + std::fill_n(it, right_padding, static_stop_head); + it += right_padding; + } else { + std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it); + } } it = std::copy(labels.begin()+i+1, labels.begin()+right_offset, it); if (right_padding > 0) { - if (reached_end == 2 || reached_end == 3) { - std::fill_n(it, right_padding, static_stop_label); - it += right_padding; - } - else { - std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it); - } + if (reached_end == 2 || reached_end == 3) { + std::fill_n(it, right_padding, static_stop_label); + it += right_padding; + } else { + std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it); + } } ngram.back() = labels_output[i]; if (ancestor_labels.size() >= m_context_up && !num_virtual) { score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); - } - else { + } else { boost::hash_combine(boundary_hash, ngram.back()); score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); } @@ -492,8 +483,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost if (ancestor_labels.size() >= m_context_up && !num_virtual) { score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); - } - else { + } else { boost::hash_combine(boundary_hash, ngram.back()); score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); } @@ -502,25 +492,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost // next time, we need to add less start symbol padding if (left_padding) - left_padding--; + left_padding--; else - left_offset++; + left_offset++; if (right_offset < heads.size()) - right_offset++; + right_offset++; else - right_padding++; + right_padding++; } if (rescoring_levels == 1) { - ancestor_heads.pop_back(); - ancestor_labels.pop_back(); - return; + ancestor_heads.pop_back(); + ancestor_labels.pop_back(); + return; } // recursion - for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) - { + for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) { Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1); } ancestor_heads.pop_back(); @@ -531,19 +520,17 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin { InternalTree *tree; - for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) - { + for (std::vector::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) { if ((*it)->IsLeafNT()) { tree = back_pointers.find(it->get())->second.get(); - } - else { + } else { tree = it->get(); } if (m_binarized && tree->GetLabel()[0] == '^') { - head_ptr = GetHead(tree, back_pointers, IDs, head_ptr); - if (head_ptr != NULL && !m_isPTKVZ) { - return head_ptr; + head_ptr = GetHead(tree, back_pointers, IDs, head_ptr); + if (head_ptr != NULL && !m_isPTKVZ) { + return head_ptr; } } @@ -563,8 +550,7 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin for (std::vector::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) { if ((*it2)->IsLeafNT()) { tree2 = back_pointers.find(it2->get())->second.get(); - } - else { + } else { tree2 = it2->get(); } if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) { @@ -602,18 +588,18 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac // extract head words / labels for (std::vector::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) { if ((*itx)->IsTerminal()) { - std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl; - std::cerr << "children: "; - for (std::vector::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) { - std::cerr << (*itx2)->GetLabel() << " "; - } - std::cerr << std::endl; - // resize vectors (should we throw exception instead?) - heads.pop_back(); - labels.pop_back(); - heads_output.pop_back(); - labels_output.pop_back(); - continue; + std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl; + std::cerr << "children: "; + for (std::vector::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) { + std::cerr << (*itx2)->GetLabel() << " "; + } + std::cerr << std::endl; + // resize vectors (should we throw exception instead?) + heads.pop_back(); + labels.pop_back(); + heads_output.pop_back(); + labels_output.pop_back(); + continue; } InternalTree* child = itx->get(); // also go through trees or previous hypotheses to rescore nodes for which more context has become available @@ -659,8 +645,7 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std } if (m_sharedVocab) { IDs.second = IDs.first; - } - else { + } else { IDs.second = lm_head_base_instance_->lookup_output_word(head); if (m_isPretermBackoff && IDs.second == 0) { IDs.second = lm_head_base_instance_->lookup_output_word(preterminal); @@ -672,12 +657,12 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std void RDLM::PrintInfo(std::vector &ngram, nplm::neuralTM* lm) const { for (size_t i = 0; i < ngram.size()-1; i++) { - std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " "; + std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " "; } std::cerr << lm->get_output_vocabulary().words()[ngram.back()] << " "; for (size_t i = 0; i < ngram.size(); i++) { - std::cerr << ngram[i] << " "; + std::cerr << ngram[i] << " "; } std::cerr << "score: " << lm->lookup_ngram(ngram) << std::endl; } @@ -691,32 +676,31 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto bool found = false; InternalTree::leafNT next_leafNT(root); for (std::vector::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) { - found = next_leafNT(it); - if (found) { - ret[it->get()] = *it_prev; - } - else { - std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n"; - } + found = next_leafNT(it); + if (found) { + ret[it->get()] = *it_prev; + } else { + std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n"; + } } return ret; } void RDLM::ScoreFile(std::string &path) { - InputFileStream inStream(path); - std::string line, null; - std::vector ancestor_heads(m_context_up, static_root_head); - std::vector ancestor_labels(m_context_up, static_root_label); - while(getline(inStream, line)) { - TreePointerMap back_pointers; - boost::array score; - score.fill(0); - InternalTree* mytree (new InternalTree(line)); - size_t boundary_hash = 0; - Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash); - std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl; - } + InputFileStream inStream(path); + std::string line, null; + std::vector ancestor_heads(m_context_up, static_root_head); + std::vector ancestor_labels(m_context_up, static_root_label); + while(getline(inStream, line)) { + TreePointerMap back_pointers; + boost::array score; + score.fill(0); + InternalTree* mytree (new InternalTree(line)); + size_t boundary_hash = 0; + Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash); + std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl; + } } @@ -727,42 +711,42 @@ void RDLM::SetParameter(const std::string& key, const std::string& value) m_tuneable = Scan(value); } else if (key == "filterable") { //ignore } else if (key == "path_head_lm") { - m_path_head_lm = value; + m_path_head_lm = value; } else if (key == "path_label_lm") { - m_path_label_lm = value; + m_path_label_lm = value; } else if (key == "ptkvz") { - m_isPTKVZ = Scan(value); + m_isPTKVZ = Scan(value); } else if (key == "backoff") { - m_isPretermBackoff = Scan(value); + m_isPretermBackoff = Scan(value); } else if (key == "context_up") { - m_context_up = Scan(value); + m_context_up = Scan(value); } else if (key == "context_left") { - m_context_left = Scan(value); + m_context_left = Scan(value); } else if (key == "context_right") { - m_context_right = Scan(value); + m_context_right = Scan(value); } else if (key == "debug_path") { - m_debugPath = value; + m_debugPath = value; } else if (key == "premultiply") { - m_premultiply = Scan(value); + m_premultiply = Scan(value); } else if (key == "rerank") { - m_rerank = Scan(value); + m_rerank = Scan(value); } else if (key == "normalize_head_lm") { - m_normalizeHeadLM = Scan(value); + m_normalizeHeadLM = Scan(value); } else if (key == "normalize_label_lm") { - m_normalizeLabelLM = Scan(value); + m_normalizeLabelLM = Scan(value); } else if (key == "binarized") { - if (value == "left") - m_binarized = 1; - else if (value == "right") - m_binarized = 2; - else if (value == "full") - m_binarized = 3; - else - UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value); + if (value == "left") + m_binarized = 1; + else if (value == "right") + m_binarized = 2; + else if (value == "full") + m_binarized = 3; + else + UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value); } else if (key == "glue_symbol") { - m_glueSymbol = value; + m_glueSymbol = value; } else if (key == "cache_size") { - m_cacheSize = Scan(value); + m_cacheSize = Scan(value); } else { UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value); } @@ -808,8 +792,8 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo size_t boundary_hash = 0; if (!m_rerank) { Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash); - accumulator->PlusEquals(ff_idx, score[0] + score[1]); - accumulator->PlusEquals(ff_idx+1, score[2] + score[3]); + accumulator->PlusEquals(ff_idx, score[0] + score[1]); + accumulator->PlusEquals(ff_idx+1, score[2] + score[3]); } mytree->Combine(previous_trees); if (m_rerank && full_sentence) { @@ -818,12 +802,11 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo accumulator->PlusEquals(ff_idx+1, score[2] + score[3]); } if (m_binarized && full_sentence) { - mytree->Unbinarize(); + mytree->Unbinarize(); } return new RDLMState(mytree, score[1], score[3], boundary_hash); - } - else { + } else { UTIL_THROW2("Error: RDLM active, but no internal tree structure found"); } diff --git a/moses/LM/RDLM.h b/moses/LM/RDLM.h index 8ae49ce76..1b92ed7c9 100644 --- a/moses/LM/RDLM.h +++ b/moses/LM/RDLM.h @@ -11,8 +11,9 @@ // Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics. // see 'scripts/training/rdlm' for training scripts -namespace nplm { - class neuralTM; +namespace nplm +{ +class neuralTM; } namespace Moses @@ -32,21 +33,21 @@ public: {} float GetApproximateScoreHead() const { - return m_approx_head; + return m_approx_head; } float GetApproximateScoreLabel() const { - return m_approx_label; + return m_approx_label; } size_t GetHash() const { - return m_hash; + return m_hash; } int Compare(const FFState& other) const { - if (m_hash == static_cast(&other)->GetHash()) return 0; - else if (m_hash > static_cast(&other)->GetHash()) return 1; - else return -1; + if (m_hash == static_cast(&other)->GetHash()) return 0; + else if (m_hash > static_cast(&other)->GetHash()) return 1; + else return -1; } }; @@ -121,10 +122,9 @@ public: , m_normalizeLabelLM(false) , m_sharedVocab(false) , m_binarized(0) - , m_cacheSize(1000000) - { - ReadParameters(); - } + , m_cacheSize(1000000) { + ReadParameters(); + } ~RDLM(); @@ -147,21 +147,23 @@ public: void SetParameter(const std::string& key, const std::string& value); void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const {}; + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const {}; void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const {}; + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const {}; void EvaluateTranslationOptionListWithSourceContext(const InputType &input , const TranslationOptionList &translationOptionList) const {}; FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, - ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");}; + ScoreComponentCollection* accumulator) const { + UTIL_THROW(util::Exception, "Not implemented"); + }; FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, @@ -173,71 +175,72 @@ public: class UnbinarizedChildren { private: - std::vector::const_iterator iter; - std::vector::const_iterator _begin; - std::vector::const_iterator _end; - InternalTree* current; - const TreePointerMap & back_pointers; - bool binarized; - std::vector::const_iterator> > stack; + std::vector::const_iterator iter; + std::vector::const_iterator _begin; + std::vector::const_iterator _end; + InternalTree* current; + const TreePointerMap & back_pointers; + bool binarized; + std::vector::const_iterator> > stack; public: - UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary): - current(root), - back_pointers(pointers), - binarized(binary) - { - stack.reserve(10); - _end = current->GetChildren().end(); - iter = current->GetChildren().begin(); - // expand virtual node - while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') { - stack.push_back(std::make_pair(current, iter)); - // also go through trees or previous hypotheses to rescore nodes for which more context has become available - if ((*iter)->IsLeafNT()) { - current = back_pointers.find(iter->get())->second.get(); - } - else { - current = iter->get(); - } - iter = current->GetChildren().begin(); - } - _begin = iter; + UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary): + current(root), + back_pointers(pointers), + binarized(binary) { + stack.reserve(10); + _end = current->GetChildren().end(); + iter = current->GetChildren().begin(); + // expand virtual node + while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') { + stack.push_back(std::make_pair(current, iter)); + // also go through trees or previous hypotheses to rescore nodes for which more context has become available + if ((*iter)->IsLeafNT()) { + current = back_pointers.find(iter->get())->second.get(); + } else { + current = iter->get(); } - - std::vector::const_iterator begin() const { return _begin; } - std::vector::const_iterator end() const { return _end; } - - std::vector::const_iterator operator++() { - iter++; - if (iter == current->GetChildren().end()) { - while (!stack.empty()) { - std::pair::const_iterator> & active = stack.back(); - current = active.first; - iter = ++active.second; - stack.pop_back(); - if (iter != current->GetChildren().end()) { - break; - } - } - if (iter == _end) { - return iter; - } - } - // expand virtual node - while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') { - stack.push_back(std::make_pair(current, iter)); - // also go through trees or previous hypotheses to rescore nodes for which more context has become available - if ((*iter)->IsLeafNT()) { - current = back_pointers.find(iter->get())->second.get(); - } - else { - current = iter->get(); - } - iter = current->GetChildren().begin(); - } - return iter; + iter = current->GetChildren().begin(); } + _begin = iter; + } + + std::vector::const_iterator begin() const { + return _begin; + } + std::vector::const_iterator end() const { + return _end; + } + + std::vector::const_iterator operator++() { + iter++; + if (iter == current->GetChildren().end()) { + while (!stack.empty()) { + std::pair::const_iterator> & active = stack.back(); + current = active.first; + iter = ++active.second; + stack.pop_back(); + if (iter != current->GetChildren().end()) { + break; + } + } + if (iter == _end) { + return iter; + } + } + // expand virtual node + while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') { + stack.push_back(std::make_pair(current, iter)); + // also go through trees or previous hypotheses to rescore nodes for which more context has become available + if ((*iter)->IsLeafNT()) { + current = back_pointers.find(iter->get())->second.get(); + } else { + current = iter->get(); + } + iter = current->GetChildren().begin(); + } + return iter; + } }; }; diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 8daaa6c8e..bb27e368b 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -73,7 +73,7 @@ Manager::Manager(ttasksptr const& ttask) const StaticData &staticData = StaticData::Instance(); SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm(); m_search = Search::CreateSearch(*this, *source, searchAlgorithm, - *m_transOptColl); + *m_transOptColl); StaticData::Instance().InitializeForInput(ttask); } @@ -87,7 +87,9 @@ Manager::~Manager() const InputType& Manager::GetSource() const -{ return m_source ; } +{ + return m_source ; +} /** * Main decoder loop that translates a sentence by expanding @@ -130,7 +132,7 @@ void Manager::Decode() searchTime.start(); m_search->Decode(); VERBOSE(1, "Line " << m_source.GetTranslationId() - << ": Search took " << searchTime << " seconds" << endl); + << ": Search took " << searchTime << " seconds" << endl); IFVERBOSE(2) { GetSentenceStats().StopTimeTotal(); TRACE_ERR(GetSentenceStats()); diff --git a/moses/OutputCollector.h b/moses/OutputCollector.h index 647b81c3e..4ca0f5ac1 100644 --- a/moses/OutputCollector.h +++ b/moses/OutputCollector.h @@ -110,7 +110,7 @@ private: #endif public: - void SetOutputStream(std::ostream* outStream){ + void SetOutputStream(std::ostream* outStream) { m_outStream = outStream; } diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index 5b5d76828..d47aca040 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -203,7 +203,7 @@ Parameter::Parameter() AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list"); AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false"); AddParam(nbest_opts,"print-alignment-info-in-n-best", - "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false"); + "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false"); /////////////////////////////////////////////////////////////////////////////////////// // server options @@ -215,7 +215,7 @@ Parameter::Parameter() po::options_description irstlm_opts("IRSTLM Options"); AddParam(irstlm_opts,"clean-lm-cache", - "clean language model caches after N translations (default N=1)"); + "clean language model caches after N translations (default N=1)"); po::options_description chart_opts("Chart Decoding Options"); AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)"); @@ -346,8 +346,8 @@ const PARAM_VEC *Parameter::GetParam(const std::string ¶mName) const void Parameter:: AddParam(po::options_description& optgroup, - string const& paramName, - string const& description) + string const& paramName, + string const& description) { m_valid[paramName] = true; m_description[paramName] = description; @@ -358,9 +358,9 @@ AddParam(po::options_description& optgroup, void Parameter:: AddParam(po::options_description& optgroup, - string const& paramName, - string const& abbrevName, - string const& description) + string const& paramName, + string const& abbrevName, + string const& description) { m_valid[paramName] = true; m_valid[abbrevName] = true; @@ -368,11 +368,10 @@ AddParam(po::options_description& optgroup, m_fullname[abbrevName] = paramName; m_description[paramName] = description; string optname = paramName; - if (abbrevName.size() == 1) - { - optname += string(",")+abbrevName; - // m_confusable[abbrevName[0]].insert(paramName); - } + if (abbrevName.size() == 1) { + optname += string(",")+abbrevName; + // m_confusable[abbrevName[0]].insert(paramName); + } optgroup.add_options()(optname.c_str(),description.c_str()); } @@ -429,12 +428,11 @@ LoadParam(int argc, char* xargv[]) // legacy parameter handling: all parameters are expected // to start with a single dash char* argv[argc+1]; - for (int i = 0; i < argc; ++i) - { - argv[i] = xargv[i]; - if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') - ++argv[i]; - } + for (int i = 0; i < argc; ++i) { + argv[i] = xargv[i]; + if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') + ++argv[i]; + } // config file (-f) arg mandatory string configPath; @@ -1260,7 +1258,7 @@ Validate() bool Parameter:: FilesExist(const string ¶mName, int fieldNo, - std::vector const& extensions) + std::vector const& extensions) { typedef std::vector StringVec; StringVec::const_iterator iter; @@ -1589,7 +1587,7 @@ template<> void Parameter:: SetParameter(bool ¶meter, std::string const& parameterName, - bool const& defaultValue) const + bool const& defaultValue) const { const PARAM_VEC *params = GetParam(parameterName); diff --git a/moses/Parameter.h b/moses/Parameter.h index 90b18c427..f6e20efc2 100644 --- a/moses/Parameter.h +++ b/moses/Parameter.h @@ -66,27 +66,27 @@ protected: void AddParam(options_description& optgroup, - value_semantic const* optvalue, - std::string const& paramName, - std::string const& description); + value_semantic const* optvalue, + std::string const& paramName, + std::string const& description); void AddParam(options_description& optgroup, - std::string const ¶mName, - std::string const &description); + std::string const ¶mName, + std::string const &description); void AddParam(options_description& optgroup, - value_semantic const* optvalue, - std::string const& paramName, - std::string const& abbrevName, - std::string const& description); + value_semantic const* optvalue, + std::string const& paramName, + std::string const& abbrevName, + std::string const& description); void AddParam(options_description& optgroup, - std::string const& paramName, - std::string const& abbrevName, - std::string const& description); + std::string const& paramName, + std::string const& abbrevName, + std::string const& description); void PrintCredit(); void PrintFF() const; diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index d07fb5f00..31de139ea 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -67,7 +67,7 @@ RegisterScoreProducer(FeatureFunction* scoreProducer) VERBOSE(1, "FeatureFunction: " << scoreProducer->GetScoreProducerDescription() << " start: " << start - << " end: " << (s_denseVectorSize-1) << endl); + << " end: " << (s_denseVectorSize-1) << endl); } @@ -194,21 +194,19 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const } std::vector const& all_ff - = FeatureFunction::GetFeatureFunctions(); - BOOST_FOREACH(FeatureFunction const* ff, all_ff) - { - string name = ff->GetScoreProducerDescription(); - size_t i = ff->GetIndex(); - if (ff->GetNumScoreComponents() == 1) - out << name << sep << m_scores[i] << linesep; - else - { - size_t stop = i + ff->GetNumScoreComponents(); - boost::format fmt("%s_%d"); - for (size_t k = 1; i < stop; ++i, ++k) - out << fmt % name % k << sep << m_scores[i] << linesep; - } + = FeatureFunction::GetFeatureFunctions(); + BOOST_FOREACH(FeatureFunction const* ff, all_ff) { + string name = ff->GetScoreProducerDescription(); + size_t i = ff->GetIndex(); + if (ff->GetNumScoreComponents() == 1) + out << name << sep << m_scores[i] << linesep; + else { + size_t stop = i + ff->GetNumScoreComponents(); + boost::format fmt("%s_%d"); + for (size_t k = 1; i < stop; ++i, ++k) + out << fmt % name % k << sep << m_scores[i] << linesep; } + } // write sparse features m_scores.write(out,sep,linesep); } diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h index 0dbdb366c..696658c80 100644 --- a/moses/ScoreComponentCollection.h +++ b/moses/ScoreComponentCollection.h @@ -231,10 +231,10 @@ public: //! produced by sp void PlusEquals(const FeatureFunction* sp, - const ScoreComponentCollection& scores) { + const ScoreComponentCollection& scores) { size_t i = sp->GetIndex(); size_t stop = i + sp->GetNumScoreComponents(); - for (;i < stop; ++i) m_scores[i] += scores.m_scores[i]; + for (; i < stop; ++i) m_scores[i] += scores.m_scores[i]; } //! Add scores from a single FeatureFunction only diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index cf866f933..e4dab8547 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -60,28 +60,23 @@ aux_init_partial_translation(string& line) string sourceCompletedStr; int loc1 = line.find( "|||", 0 ); int loc2 = line.find( "|||", loc1 + 3 ); - if (loc1 > -1 && loc2 > -1) - { - m_initialTargetPhrase = Trim(line.substr(0, loc1)); - string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3)); - line = line.substr(loc2 + 3); + if (loc1 > -1 && loc2 > -1) { + m_initialTargetPhrase = Trim(line.substr(0, loc1)); + string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3)); + line = line.substr(loc2 + 3); - m_sourceCompleted.resize(scov.size()); - int contiguous = 1; - for (size_t i = 0; i < scov.size(); ++i) - { - if (sourceCompletedStr.at(i) == '1') - { - m_sourceCompleted[i] = true; - if (contiguous) m_frontSpanCoveredLength++; - } - else - { - m_sourceCompleted[i] = false; - contiguous = 0; - } - } + m_sourceCompleted.resize(scov.size()); + int contiguous = 1; + for (size_t i = 0; i < scov.size(); ++i) { + if (sourceCompletedStr.at(i) == '1') { + m_sourceCompleted[i] = true; + if (contiguous) m_frontSpanCoveredLength++; + } else { + m_sourceCompleted[i] = false; + contiguous = 0; + } } + } } void @@ -94,38 +89,31 @@ aux_interpret_sgml_markup(string& line) metamap::const_iterator i; if ((i = meta.find("id")) != meta.end()) this->SetTranslationId(atol(i->second.c_str())); - if ((i = meta.find("docid")) != meta.end()) - { - this->SetDocumentId(atol(i->second.c_str())); - this->SetUseTopicId(false); + if ((i = meta.find("docid")) != meta.end()) { + this->SetDocumentId(atol(i->second.c_str())); + this->SetUseTopicId(false); + this->SetUseTopicIdAndProb(false); + } + if ((i = meta.find("topic")) != meta.end()) { + vector topic_params; + boost::split(topic_params, i->second, boost::is_any_of("\t ")); + if (topic_params.size() == 1) { + this->SetTopicId(atol(topic_params[0].c_str())); + this->SetUseTopicId(true); this->SetUseTopicIdAndProb(false); + } else { + this->SetTopicIdAndProb(topic_params); + this->SetUseTopicId(false); + this->SetUseTopicIdAndProb(true); } - if ((i = meta.find("topic")) != meta.end()) - { - vector topic_params; - boost::split(topic_params, i->second, boost::is_any_of("\t ")); - if (topic_params.size() == 1) - { - this->SetTopicId(atol(topic_params[0].c_str())); - this->SetUseTopicId(true); - this->SetUseTopicIdAndProb(false); - } - else - { - this->SetTopicIdAndProb(topic_params); - this->SetUseTopicId(false); - this->SetUseTopicIdAndProb(true); - } - } - if ((i = meta.find("weight-setting")) != meta.end()) - { - this->SetWeightSetting(i->second); - this->SetSpecifiesWeightSetting(true); - StaticData::Instance().SetWeightSetting(i->second); - // oh this is so horrible! Why does this have to be propagated globally? - // --- UG - } - else this->SetSpecifiesWeightSetting(false); + } + if ((i = meta.find("weight-setting")) != meta.end()) { + this->SetWeightSetting(i->second); + this->SetSpecifiesWeightSetting(true); + StaticData::Instance().SetWeightSetting(i->second); + // oh this is so horrible! Why does this have to be propagated globally? + // --- UG + } else this->SetSpecifiesWeightSetting(false); } void @@ -135,48 +123,44 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG using namespace std; typedef map str2str_map; vector meta = ProcessAndStripDLT(line); - BOOST_FOREACH(str2str_map const& M, meta) - { - str2str_map::const_iterator i,j; - if ((i = M.find("type")) != M.end()) - { - j = M.find("id"); - string id = j == M.end() ? "default" : j->second; - if (i->second == "cbtm") - { - PhraseDictionaryDynamicCacheBased* cbtm; - cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id); - if (cbtm) cbtm->ExecuteDlt(M); - } - if (i->second == "cblm") - { - DynamicCacheBasedLanguageModel* cblm; - cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id); - if (cblm) cblm->ExecuteDlt(M); - } - } + BOOST_FOREACH(str2str_map const& M, meta) { + str2str_map::const_iterator i,j; + if ((i = M.find("type")) != M.end()) { + j = M.find("id"); + string id = j == M.end() ? "default" : j->second; + if (i->second == "cbtm") { + PhraseDictionaryDynamicCacheBased* cbtm; + cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id); + if (cbtm) cbtm->ExecuteDlt(M); + } + if (i->second == "cblm") { + DynamicCacheBasedLanguageModel* cblm; + cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id); + if (cblm) cblm->ExecuteDlt(M); + } } + } } void Sentence:: aux_interpret_xml(std::string& line, std::vector & xmlWalls, - std::vector >& placeholders) -{ // parse XML markup in translation line + std::vector >& placeholders) +{ + // parse XML markup in translation line const StaticData &SD = StaticData::Instance(); using namespace std; - if (SD.GetXmlInputType() != XmlPassThrough) - { - int offset = SD.IsSyntax() ? 1 : 0; - bool OK = ProcessAndStripXMLTags(line, m_xmlOptions, - m_reorderingConstraint, - xmlWalls, placeholders, offset, - SD.GetXmlBrackets().first, - SD.GetXmlBrackets().second); - UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line); - } + if (SD.GetXmlInputType() != XmlPassThrough) { + int offset = SD.IsSyntax() ? 1 : 0; + bool OK = ProcessAndStripXMLTags(line, m_xmlOptions, + m_reorderingConstraint, + xmlWalls, placeholders, offset, + SD.GetXmlBrackets().first, + SD.GetXmlBrackets().second); + UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line); + } } void @@ -197,11 +181,10 @@ init(string line, std::vector const& factorOrder) aux_interpret_dlt(line); // some poorly documented cache-based stuff // if sentences is specified as "" - if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) - { - string pthru = PassthroughSGML(line,"passthrough"); - this->SetPassthroughInformation(pthru); - } + if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) { + string pthru = PassthroughSGML(line,"passthrough"); + this->SetPassthroughInformation(pthru); + } vector xmlWalls; vector >placeholders; @@ -218,26 +201,23 @@ init(string line, std::vector const& factorOrder) // our XmlOptions and create TranslationOptions // only fill the vector if we are parsing XML - if (SD.GetXmlInputType() != XmlPassThrough) - { - m_xmlCoverageMap.assign(GetSize(), false); - BOOST_FOREACH(XmlOption* o, m_xmlOptions) - { - WordsRange const& r = o->range; - for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j) - m_xmlCoverageMap[j]=true; - } + if (SD.GetXmlInputType() != XmlPassThrough) { + m_xmlCoverageMap.assign(GetSize(), false); + BOOST_FOREACH(XmlOption* o, m_xmlOptions) { + WordsRange const& r = o->range; + for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j) + m_xmlCoverageMap[j]=true; } + } // reordering walls and zones m_reorderingConstraint.InitializeWalls(GetSize()); // set reordering walls, if "-monotone-at-punction" is set - if (SD.UseReorderingConstraint() && GetSize()) - { - WordsRange r(0, GetSize()-1); - m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r)); - } + if (SD.UseReorderingConstraint() && GetSize()) { + WordsRange r(0, GetSize()-1); + m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r)); + } // set walls obtained from xml for(size_t i=0; i const& FOrder, string const& phraseString) Sentence:: Sentence(size_t const transId, string const& stext, - vector const* IFO) + vector const* IFO) : InputType(transId) { if (IFO) init(stext, *IFO); diff --git a/moses/Sentence.h b/moses/Sentence.h index 8a870f76b..661280711 100644 --- a/moses/Sentence.h +++ b/moses/Sentence.h @@ -32,109 +32,110 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA namespace Moses { - class WordsRange; - class PhraseDictionary; - class TranslationOption; - class TranslationOptionCollection; - class ChartTranslationOptions; - class TranslationTask; - struct XmlOption; +class WordsRange; +class PhraseDictionary; +class TranslationOption; +class TranslationOptionCollection; +class ChartTranslationOptions; +class TranslationTask; +struct XmlOption; +/** + * A Phrase class with an ID. Used specifically as source input so contains functionality to read + * from IODevice and create trans opt + */ +class Sentence : public Phrase, public InputType +{ +protected: + /** - * A Phrase class with an ID. Used specifically as source input so contains functionality to read - * from IODevice and create trans opt + * Utility method that takes in a string representing an XML tag and the name of the attribute, + * and returns the value of that tag if present, empty string otherwise */ - class Sentence : public Phrase, public InputType - { - protected: + std::vector m_xmlOptions; + std::vector m_xmlCoverageMap; - /** - * Utility method that takes in a string representing an XML tag and the name of the attribute, - * and returns the value of that tag if present, empty string otherwise - */ - std::vector m_xmlOptions; - std::vector m_xmlCoverageMap; + NonTerminalSet m_defaultLabelSet; - NonTerminalSet m_defaultLabelSet; - - void ProcessPlaceholders(const std::vector< std::pair > &placeholders); + void ProcessPlaceholders(const std::vector< std::pair > &placeholders); - public: - Sentence(); - Sentence(size_t const transId, std::string const& stext, - std::vector const* IFO = NULL); - // Sentence(size_t const transId, std::string const& stext); - ~Sentence(); +public: + Sentence(); + Sentence(size_t const transId, std::string const& stext, + std::vector const* IFO = NULL); + // Sentence(size_t const transId, std::string const& stext); + ~Sentence(); - InputTypeEnum GetType() const { - return SentenceInput; - } + InputTypeEnum GetType() const { + return SentenceInput; + } - //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString() - Phrase GetSubString(const WordsRange& r) const { - return Phrase::GetSubString(r); - } + //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString() + Phrase GetSubString(const WordsRange& r) const { + return Phrase::GetSubString(r); + } - //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord() - const Word& GetWord(size_t pos) const { - return Phrase::GetWord(pos); - } + //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord() + const Word& GetWord(size_t pos) const { + return Phrase::GetWord(pos); + } - //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize() - size_t GetSize() const { - return Phrase::GetSize(); - } + //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize() + size_t GetSize() const { + return Phrase::GetSize(); + } - //! Returns true if there were any XML tags parsed that at least partially covered the range passed - bool XmlOverlap(size_t startPos, size_t endPos) const; + //! Returns true if there were any XML tags parsed that at least partially covered the range passed + bool XmlOverlap(size_t startPos, size_t endPos) const; - //! populates vector argument with XML force translation options for the specific range passed - void GetXmlTranslationOptions(std::vector &list) const; - void GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const; - std::vector GetXmlChartTranslationOptions() const; + //! populates vector argument with XML force translation options for the specific range passed + void GetXmlTranslationOptions(std::vector &list) const; + void GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const; + std::vector GetXmlChartTranslationOptions() const; - virtual int Read(std::istream& in,const std::vector& factorOrder); - void Print(std::ostream& out) const; + virtual int Read(std::istream& in,const std::vector& factorOrder); + void Print(std::ostream& out) const; - TranslationOptionCollection* - CreateTranslationOptionCollection(ttasksptr const& ttask) const; + TranslationOptionCollection* + CreateTranslationOptionCollection(ttasksptr const& ttask) const; - virtual void - CreateFromString(std::vector const &factorOrder, - std::string const& phraseString); + virtual void + CreateFromString(std::vector const &factorOrder, + std::string const& phraseString); - const NonTerminalSet& - GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const - { return m_defaultLabelSet; } + const NonTerminalSet& + GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const { + return m_defaultLabelSet; + } - void - init(std::string line, std::vector const& factorOrder); + void + init(std::string line, std::vector const& factorOrder); - private: - // auxliliary functions for Sentence initialization - // void aux_interpret_sgml_markup(std::string& line); - // void aux_interpret_dlt(std::string& line); - // void aux_interpret_xml (std::string& line, std::vector & xmlWalls, - // std::vector >& placeholders); +private: + // auxliliary functions for Sentence initialization + // void aux_interpret_sgml_markup(std::string& line); + // void aux_interpret_dlt(std::string& line); + // void aux_interpret_xml (std::string& line, std::vector & xmlWalls, + // std::vector >& placeholders); - void - aux_interpret_sgml_markup(std::string& line); + void + aux_interpret_sgml_markup(std::string& line); - void - aux_interpret_dlt(std::string& line); + void + aux_interpret_dlt(std::string& line); - void - aux_interpret_xml - (std::string& line, std::vector & xmlWalls, - std::vector >& placeholders); + void + aux_interpret_xml + (std::string& line, std::vector & xmlWalls, + std::vector >& placeholders); - void - aux_init_partial_translation(std::string& line); + void + aux_init_partial_translation(std::string& line); - }; +}; } diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 420ad7a20..c3d55f5c7 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -118,7 +118,7 @@ StaticData string &feature = toks[0]; std::map::const_iterator iter - = featureNameOverride.find(feature); + = featureNameOverride.find(feature); if (iter == featureNameOverride.end()) { // feature name not override m_registry.Construct(feature, line); @@ -146,7 +146,7 @@ StaticData m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); m_parameter->SetParameter(m_continuePartialTranslation, - "continue-partial-translation", false ); + "continue-partial-translation", false ); std::string s_it = "text input"; if (m_inputType == 1) { @@ -160,7 +160,7 @@ StaticData } VERBOSE(2,"input type is: "<SetParameter(m_xmlInputType, "xml-input", XmlPassThrough); // specify XML tags opening and closing brackets for XML option @@ -178,7 +178,7 @@ StaticData } m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange, - "default-non-term-for-empty-range-only", false ); + "default-non-term-for-empty-range-only", false ); } @@ -347,18 +347,18 @@ StaticData m_parameter->SetParameter(m_PrintAlignmentInfoNbest, - "print-alignment-info-in-n-best", false ); + "print-alignment-info-in-n-best", false ); // include feature names in the n-best list m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true ); // include word alignment in the n-best list m_parameter->SetParameter(m_nBestIncludesSegmentation, - "include-segmentation-in-n-best", false ); + "include-segmentation-in-n-best", false ); // print all factors of output translations m_parameter->SetParameter(m_reportAllFactorsNBest, - "report-all-factors-in-n-best", false ); + "report-all-factors-in-n-best", false ); m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false ); return true; @@ -412,7 +412,7 @@ StaticData #ifndef WITH_THREADS if (m_threadCount > 1) { std::cerr << "Error: Thread count of " << params->at(0) - << " but moses not built with thread support"; + << " but moses not built with thread support"; return false; } #endif @@ -426,11 +426,11 @@ StaticData ::ini_cube_pruning_options() { m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit", - DEFAULT_CUBE_PRUNING_POP_LIMIT); + DEFAULT_CUBE_PRUNING_POP_LIMIT); m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity", - DEFAULT_CUBE_PRUNING_DIVERSITY); + DEFAULT_CUBE_PRUNING_DIVERSITY); m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", - false); + false); } void @@ -468,7 +468,7 @@ void StaticData ::ini_oov_options() { - // unknown word processing + // unknown word processing m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); m_parameter->SetParameter(m_markUnknown, "mark-unknown", false ); @@ -647,7 +647,7 @@ bool StaticData::LoadData(Parameter *parameter) // S2T decoder m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", - RecursiveCYKPlus); + RecursiveCYKPlus); ini_zombie_options(); // probably dead, or maybe not @@ -1016,7 +1016,7 @@ StaticData ::InitializeForInput(ttasksptr const& ttask) const { const std::vector &producers - = FeatureFunction::GetFeatureFunctions(); + = FeatureFunction::GetFeatureFunctions(); for(size_t i=0; i &producers - = FeatureFunction::GetFeatureFunctions(); + = FeatureFunction::GetFeatureFunctions(); for(size_t i=0; i::iterator iter; for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) { cerr << *iter << ","; diff --git a/moses/StaticData.h b/moses/StaticData.h index 438ac0633..2b46d1ef0 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -476,18 +476,18 @@ public: // m_searchAlgorithm == SyntaxF2S; // } - bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const - { + bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const { if (algo == DefaultSearchAlgorithm) algo = m_searchAlgorithm; return (algo == CYKPlus || algo == ChartIncremental || - algo == SyntaxS2T || algo == SyntaxT2S || - algo == SyntaxF2S || algo == SyntaxT2S_SCFG); + algo == SyntaxS2T || algo == SyntaxT2S || + algo == SyntaxF2S || algo == SyntaxT2S_SCFG); } const ScoreComponentCollection& - GetAllWeights() const - { return m_allWeights; } + GetAllWeights() const { + return m_allWeights; + } void SetAllWeights(const ScoreComponentCollection& weights) { m_allWeights = weights; diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp index bd19cbace..21d5b0447 100644 --- a/moses/Syntax/F2S/HyperTreeLoader.cpp +++ b/moses/Syntax/F2S/HyperTreeLoader.cpp @@ -146,7 +146,7 @@ bool HyperTreeLoader::Load(const std::vector &input, } void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath( - const HyperPath &hp, boost::unordered_set &sourceTerminalSet) + const HyperPath &hp, boost::unordered_set &sourceTerminalSet) { for (std::vector::const_iterator p = hp.nodeSeqs.begin(); p != hp.nodeSeqs.end(); ++p) { diff --git a/moses/Syntax/F2S/HyperTreeLoader.h b/moses/Syntax/F2S/HyperTreeLoader.h index 088c7eaf5..eebf1185a 100644 --- a/moses/Syntax/F2S/HyperTreeLoader.h +++ b/moses/Syntax/F2S/HyperTreeLoader.h @@ -31,7 +31,7 @@ public: private: void ExtractSourceTerminalSetFromHyperPath( - const HyperPath &, boost::unordered_set &); + const HyperPath &, boost::unordered_set &); }; } // namespace F2S diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h index 3aedc640e..55f85e888 100644 --- a/moses/Syntax/F2S/Manager-inl.h +++ b/moses/Syntax/F2S/Manager-inl.h @@ -39,7 +39,7 @@ Manager::Manager(ttasksptr const& ttask) if (const ForestInput *p = dynamic_cast(&m_source)) { m_forest = p->GetForest(); m_rootVertex = p->GetRootVertex(); - m_sentenceLength = p->GetSize(); + m_sentenceLength = p->GetSize(); } else if (const TreeInput *p = dynamic_cast(&m_source)) { T2S::InputTreeBuilder builder; T2S::InputTree tmpTree; diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h index 1dcab4f5e..bcf1ff2bd 100644 --- a/moses/Syntax/F2S/Manager.h +++ b/moses/Syntax/F2S/Manager.h @@ -39,7 +39,7 @@ public: typedef std::vector > kBestList_t; void ExtractKBest(std::size_t k, kBestList_t& kBestList, - bool onlyDistinct=false) const; + bool onlyDistinct=false) const; void OutputDetailedTranslationReport(OutputCollector *collector) const; diff --git a/moses/Syntax/InputWeightFF.cpp b/moses/Syntax/InputWeightFF.cpp index af44e31ec..8bb88c6ac 100644 --- a/moses/Syntax/InputWeightFF.cpp +++ b/moses/Syntax/InputWeightFF.cpp @@ -11,34 +11,34 @@ namespace Syntax { InputWeightFF::InputWeightFF(const std::string &line) - : StatelessFeatureFunction(1, line) + : StatelessFeatureFunction(1, line) { ReadParameters(); } void InputWeightFF::EvaluateWhenApplied(const Hypothesis& hypo, - ScoreComponentCollection* accumulator) const + ScoreComponentCollection* accumulator) const { // TODO Throw exception. assert(false); } void InputWeightFF::EvaluateWhenApplied(const ChartHypothesis &hypo, - ScoreComponentCollection* accumulator) const + ScoreComponentCollection* accumulator) const { // TODO Throw exception. assert(false); } void InputWeightFF::EvaluateWhenApplied( - const Syntax::SHyperedge &hyperedge, - ScoreComponentCollection* accumulator) const + const Syntax::SHyperedge &hyperedge, + ScoreComponentCollection* accumulator) const { accumulator->PlusEquals(this, hyperedge.label.inputWeight); } void InputWeightFF::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { StatelessFeatureFunction::SetParameter(key, value); } diff --git a/moses/Syntax/InputWeightFF.h b/moses/Syntax/InputWeightFF.h index bdda1d922..127834e64 100644 --- a/moses/Syntax/InputWeightFF.h +++ b/moses/Syntax/InputWeightFF.h @@ -42,7 +42,7 @@ public: ScoreComponentCollection *) const {} void EvaluateTranslationOptionListWithSourceContext( - const InputType &, const TranslationOptionList &) const {} + const InputType &, const TranslationOptionList &) const {} }; } // Syntax diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index 5a26e44cc..4976375e9 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -225,21 +225,19 @@ void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProduce boost::shared_ptr mergescores(boost::shared_ptr const& a, - boost::shared_ptr const& b) + boost::shared_ptr const& b) { boost::shared_ptr ret; if (!a) return b ? b : ret; if (!b) return a; if (a->size() != b->size()) return ret; ret.reset(new Scores(*a)); - for (size_t i = 0; i < a->size(); ++i) - { - if ((*a)[i] == 0) (*a)[i] = (*b)[i]; - else if ((*b)[i]) - { - UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors"); - } + for (size_t i = 0; i < a->size(); ++i) { + if ((*a)[i] == 0) (*a)[i] = (*b)[i]; + else if ((*b)[i]) { + UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors"); } + } return ret; } @@ -253,12 +251,11 @@ Merge(const TargetPhrase ©, const std::vector& factorVec) m_fullScore += copy.m_fullScore; typedef ScoreCache_t::iterator iter; typedef ScoreCache_t::value_type item; - BOOST_FOREACH(item const& s, copy.m_cached_scores) - { - pair foo = m_cached_scores.insert(s); - if (foo.second == false) - foo.first->second = mergescores(foo.first->second, s.second); - } + BOOST_FOREACH(item const& s, copy.m_cached_scores) { + pair foo = m_cached_scores.insert(s); + if (foo.second == false) + foo.first->second = mergescores(foo.first->second, s.second); + } } TargetPhrase::ScoreCache_t const& @@ -279,8 +276,10 @@ GetExtraScores(FeatureFunction const* ff) const void TargetPhrase:: SetExtraScores(FeatureFunction const* ff, - boost::shared_ptr const& s) -{ m_cached_scores[ff] = s; } + boost::shared_ptr const& s) +{ + m_cached_scores[ff] = s; +} void TargetPhrase::SetProperties(const StringPiece &str) diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h index 1f5960121..35b06c1c7 100644 --- a/moses/TargetPhrase.h +++ b/moses/TargetPhrase.h @@ -51,15 +51,15 @@ class PhraseDictionary; */ class TargetPhrase: public Phrase { - public: +public: typedef std::map > - ScoreCache_t; + ScoreCache_t; ScoreCache_t const& GetExtraScores() const; Scores const* GetExtraScores(FeatureFunction const* ff) const; void SetExtraScores(FeatureFunction const* ff, - boost::shared_ptr const& scores); + boost::shared_ptr const& scores); - private: +private: ScoreCache_t m_cached_scores; private: diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h index 6166b4d42..4d2152920 100644 --- a/moses/TrainingTask.h +++ b/moses/TrainingTask.h @@ -18,7 +18,7 @@ class TrainingTask : public Moses::TranslationTask protected: TrainingTask(boost::shared_ptr const source, - boost::shared_ptr const ioWrapper) + boost::shared_ptr const ioWrapper) : TranslationTask(source, ioWrapper) { } @@ -26,8 +26,7 @@ public: // factory function static boost::shared_ptr - create(boost::shared_ptr const& source) - { + create(boost::shared_ptr const& source) { boost::shared_ptr nix; boost::shared_ptr ret(new TrainingTask(source, nix)); ret->m_self = ret; @@ -37,8 +36,7 @@ public: // factory function static boost::shared_ptr create(boost::shared_ptr const& source, - boost::shared_ptr const& ioWrapper) - { + boost::shared_ptr const& ioWrapper) { boost::shared_ptr ret(new TrainingTask(source, ioWrapper)); ret->m_self = ret; return ret; @@ -53,7 +51,7 @@ public: std::cerr << *m_source << std::endl; TranslationOptionCollection *transOptColl - = m_source->CreateTranslationOptionCollection(this->self()); + = m_source->CreateTranslationOptionCollection(this->self()); transOptColl->CreateTranslationOptions(); delete transOptColl; diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h index 130dd89fc..0f20fa1b2 100644 --- a/moses/TranslationModel/CompactPT/BlockHashIndex.h +++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h @@ -163,7 +163,7 @@ public: #ifdef WITH_THREADS boost::shared_ptr > - ht(new HashTask(current, *this, keys)); + ht(new HashTask(current, *this, keys)); m_threadPool.Submit(ht); #else CalcHash(current, keys); diff --git a/moses/TranslationModel/CompactPT/MmapAllocator.h b/moses/TranslationModel/CompactPT/MmapAllocator.h index 389b60359..5c1d5b58e 100644 --- a/moses/TranslationModel/CompactPT/MmapAllocator.h +++ b/moses/TranslationModel/CompactPT/MmapAllocator.h @@ -133,7 +133,7 @@ public: size_t read = 0; read += ftruncate(m_file_desc, m_map_size); m_data_ptr = (char *)util::MapOrThrow( - m_map_size, true, map_shared, false, m_file_desc, 0); + m_map_size, true, map_shared, false, m_file_desc, 0); return (pointer)m_data_ptr; } else { size_t map_offset = (m_data_offset / m_page_size) * m_page_size; @@ -142,7 +142,7 @@ public: size_t map_size = m_map_size + relative_offset; m_data_ptr = (char *)util::MapOrThrow( - m_map_size, false, map_shared, false, m_file_desc, map_offset); + m_map_size, false, map_shared, false, m_file_desc, map_offset); return (pointer)(m_data_ptr + relative_offset); } diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h index 2c1f1f39e..6c30d5acd 100644 --- a/moses/TranslationModel/PhraseDictionary.h +++ b/moses/TranslationModel/PhraseDictionary.h @@ -117,8 +117,7 @@ public: virtual TargetPhraseCollection const * - GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) - { + GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) { return GetTargetPhraseCollectionLEGACY(src); } @@ -129,8 +128,7 @@ public: virtual void GetTargetPhraseCollectionBatch(ttasksptr const& ttask, - const InputPathList &inputPathQueue) const - { + const InputPathList &inputPathQueue) const { GetTargetPhraseCollectionBatch(inputPathQueue); } diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp index 52bf49fb2..484692ad9 100644 --- a/moses/TranslationOption.cpp +++ b/moses/TranslationOption.cpp @@ -109,7 +109,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation) return out; } - /** returns cached scores */ +/** returns cached scores */ const Scores* TranslationOption:: GetLexReorderingScores(LexicalReordering const* scoreProducer) const diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h index 4bf545f7d..87a3c9c5d 100644 --- a/moses/TranslationOption.h +++ b/moses/TranslationOption.h @@ -164,7 +164,7 @@ public: // } void CacheLexReorderingScores(const LexicalReordering &scoreProducer, - const Scores &score); + const Scores &score); TO_STRING(); diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 1e3ef9045..07544b88d 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -57,7 +57,7 @@ namespace Moses * called by inherited classe */ TranslationOptionCollection:: TranslationOptionCollection(ttasksptr const& ttask, - InputType const& src, + InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : m_ttask(ttask) @@ -626,14 +626,13 @@ CacheLexReordering() { size_t const stop = m_source.GetSize(); typedef StatefulFeatureFunction sfFF; - BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) - { - if (typeid(*ff) != typeid(LexicalReordering)) continue; - LexicalReordering const& lr = static_cast(*ff); - for (size_t s = 0 ; s < stop ; s++) - BOOST_FOREACH(TranslationOptionList& tol, m_collection[s]) - lr.SetCache(tol); - } + BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) { + if (typeid(*ff) != typeid(LexicalReordering)) continue; + LexicalReordering const& lr = static_cast(*ff); + for (size_t s = 0 ; s < stop ; s++) + BOOST_FOREACH(TranslationOptionList& tol, m_collection[s]) + lr.SetCache(tol); + } } //! list of trans opt for a particular span diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h index 4c0a6bdc6..2712441ed 100644 --- a/moses/TranslationOptionCollection.h +++ b/moses/TranslationOptionCollection.h @@ -75,7 +75,7 @@ protected: InputPathList m_inputPathQueue; TranslationOptionCollection(ttasksptr const& ttask, - InputType const& src, size_t maxNoTransOptPerCoverage, + InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold); void CalcFutureScore(); @@ -177,8 +177,7 @@ public: return m_inputPathQueue; } - ttasksptr GetTranslationTask() const - { + ttasksptr GetTranslationTask() const { return m_ttask.lock(); } TO_STRING(); diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 387821102..6ee83d969 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -21,7 +21,7 @@ namespace Moses /** constructor; just initialize the base class */ TranslationOptionCollectionConfusionNet:: TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, - const ConfusionNet &input, + const ConfusionNet &input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : TranslationOptionCollection(ttask,input, maxNoTransOptPerCoverage, diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp index e2d9e996a..fde40e538 100644 --- a/moses/TranslationOptionCollectionLattice.cpp +++ b/moses/TranslationOptionCollectionLattice.cpp @@ -23,7 +23,7 @@ TranslationOptionCollectionLattice ( ttasksptr const& ttask, const WordLattice &input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold) : TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage, - translationOptionThreshold) + translationOptionThreshold) { UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(), "Not for models using the legqacy binary phrase table"); diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 764ca998a..3794d35e7 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -53,7 +53,7 @@ TranslationTask boost::shared_ptr TranslationTask ::create(boost::shared_ptr const& source, - boost::shared_ptr const& ioWrapper) + boost::shared_ptr const& ioWrapper) { boost::shared_ptr ret(new TranslationTask(source, ioWrapper)); ret->m_self = ret; @@ -63,7 +63,7 @@ TranslationTask TranslationTask ::TranslationTask(boost::shared_ptr const& source, - boost::shared_ptr const& ioWrapper) + boost::shared_ptr const& ioWrapper) : m_source(source) , m_ioWrapper(ioWrapper) { } @@ -82,37 +82,33 @@ TranslationTask if (!staticData.IsSyntax(algo)) manager.reset(new Manager(this->self())); // phrase-based - else if (algo == SyntaxF2S || algo == SyntaxT2S) - { // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams) - typedef Syntax::F2S::RuleMatcherCallback Callback; - typedef Syntax::F2S::RuleMatcherHyperTree RuleMatcher; - manager.reset(new Syntax::F2S::Manager(this->self())); - } + else if (algo == SyntaxF2S || algo == SyntaxT2S) { + // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams) + typedef Syntax::F2S::RuleMatcherCallback Callback; + typedef Syntax::F2S::RuleMatcherHyperTree RuleMatcher; + manager.reset(new Syntax::F2S::Manager(this->self())); + } - else if (algo == SyntaxS2T) - { // new-style string-to-tree decoding (ask Phil Williams) - S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm(); - if (algorithm == RecursiveCYKPlus) - { - typedef Syntax::S2T::EagerParserCallback Callback; - typedef Syntax::S2T::RecursiveCYKPlusParser Parser; - manager.reset(new Syntax::S2T::Manager(this->self())); - } - else if (algorithm == Scope3) - { - typedef Syntax::S2T::StandardParserCallback Callback; - typedef Syntax::S2T::Scope3Parser Parser; - manager.reset(new Syntax::S2T::Manager(this->self())); - } - else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm"); - } + else if (algo == SyntaxS2T) { + // new-style string-to-tree decoding (ask Phil Williams) + S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm(); + if (algorithm == RecursiveCYKPlus) { + typedef Syntax::S2T::EagerParserCallback Callback; + typedef Syntax::S2T::RecursiveCYKPlusParser Parser; + manager.reset(new Syntax::S2T::Manager(this->self())); + } else if (algorithm == Scope3) { + typedef Syntax::S2T::StandardParserCallback Callback; + typedef Syntax::S2T::Scope3Parser Parser; + manager.reset(new Syntax::S2T::Manager(this->self())); + } else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm"); + } - else if (algo == SyntaxT2S_SCFG) - { // SCFG-based tree-to-string decoding (ask Phil Williams) - typedef Syntax::F2S::RuleMatcherCallback Callback; - typedef Syntax::T2S::RuleMatcherSCFG RuleMatcher; - manager.reset(new Syntax::T2S::Manager(this->self())); - } + else if (algo == SyntaxT2S_SCFG) { + // SCFG-based tree-to-string decoding (ask Phil Williams) + typedef Syntax::F2S::RuleMatcherCallback Callback; + typedef Syntax::T2S::RuleMatcherSCFG RuleMatcher; + manager.reset(new Syntax::T2S::Manager(this->self())); + } else if (algo == ChartIncremental) // Ken's incremental decoding manager.reset(new Incremental::Manager(this->self())); @@ -126,8 +122,8 @@ TranslationTask void TranslationTask::Run() { UTIL_THROW_IF2(!m_source || !m_ioWrapper, - "Base Instances of TranslationTask must be initialized with" - << " input and iowrapper."); + "Base Instances of TranslationTask must be initialized with" + << " input and iowrapper."); // shorthand for "global data" @@ -152,7 +148,7 @@ void TranslationTask::Run() boost::shared_ptr manager = SetupManager(); VERBOSE(1, "Line " << translationId << ": Initialize search took " - << initTime << " seconds total" << endl); + << initTime << " seconds total" << endl); manager->Decode(); @@ -209,9 +205,9 @@ void TranslationTask::Run() // report additional statistics manager->CalcDecoderStatistics(); VERBOSE(1, "Line " << translationId << ": Additional reporting took " - << additionalReportingTime << " seconds total" << endl); + << additionalReportingTime << " seconds total" << endl); VERBOSE(1, "Line " << translationId << ": Translation took " - << translationTime << " seconds total" << endl); + << translationTime << " seconds total" << endl); IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); } diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h index df1cf9f48..2b75c47d5 100644 --- a/moses/TranslationTask.h +++ b/moses/TranslationTask.h @@ -40,7 +40,9 @@ class TranslationTask : public Moses::Task TranslationTask(TranslationTask const& other) { } TranslationTask const& - operator=(TranslationTask const& other) { return *this; } + operator=(TranslationTask const& other) { + return *this; + } protected: boost::weak_ptr m_self; // weak ptr to myself @@ -48,7 +50,7 @@ protected: // pointer to ContextScope, which stores context-specific information TranslationTask() { } ; TranslationTask(boost::shared_ptr const& source, - boost::shared_ptr const& ioWrapper); + boost::shared_ptr const& ioWrapper); // Yes, the constructor is protected. // // TranslationTasks can only be created through the creator @@ -68,11 +70,15 @@ protected: public: boost::shared_ptr - self() { return m_self.lock(); } + self() { + return m_self.lock(); + } virtual boost::shared_ptr - self() const { return m_self.lock(); } + self() const { + return m_self.lock(); + } // creator functions static boost::shared_ptr create(); @@ -84,7 +90,7 @@ public: static boost::shared_ptr create(boost::shared_ptr const& source, - boost::shared_ptr const& ioWrapper); + boost::shared_ptr const& ioWrapper); ~TranslationTask(); /** Translate one sentence @@ -92,15 +98,16 @@ public: virtual void Run(); boost::shared_ptr - GetSource() const { return m_source; } + GetSource() const { + return m_source; + } boost::shared_ptr SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm); boost::shared_ptr const& - GetScope() const - { + GetScope() const { UTIL_THROW_IF2(m_scope == NULL, "No context scope!"); return m_scope; } diff --git a/moses/TreeInput.h b/moses/TreeInput.h index 7b76ce303..fc7387b0d 100644 --- a/moses/TreeInput.h +++ b/moses/TreeInput.h @@ -8,7 +8,7 @@ namespace Moses { - class TranslationTask; +class TranslationTask; //! @todo what is this? class XMLParseOutput { diff --git a/moses/Util.h b/moses/Util.h index b6d4ef613..43443eb02 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -427,7 +427,7 @@ inline float CalcTranslationScore(const std::vector &probVector, out << *this; \ return out.str(); \ } \ - + //! delete and remove every element of a collection object such as set, list etc template void RemoveAllInColl(COLL &coll) diff --git a/moses/server/Optimizer.cpp b/moses/server/Optimizer.cpp index d28d7f085..8e5babfc7 100644 --- a/moses/server/Optimizer.cpp +++ b/moses/server/Optimizer.cpp @@ -3,70 +3,67 @@ namespace MosesServer { - using namespace std; +using namespace std; - Optimizer:: - Optimizer() - { - // signature and help strings are documentation -- the client - // can query this information with a system.methodSignature and - // system.methodHelp RPC. - this->_signature = "S:S"; - this->_help = "Optimizes multi-model translation model"; - } - - void - Optimizer:: - execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP) - { -#ifdef WITH_DLIB - const params_t params = paramList.getStruct(0); - params_t::const_iterator si; - if ((si = params.find("model_name")) == params.end()) - { - string msg = "Missing name of model to be optimized"; - msg += " (e.g. PhraseDictionaryMultiModelCounts0)"; - throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); - } - const string model_name = xmlrpc_c::value_string(si->second); - - if ((si = params.find("phrase_pairs")) == params.end()) - { - throw xmlrpc_c::fault("Missing list of phrase pairs", - xmlrpc_c::fault::CODE_PARSE); - } - - - vector > phrase_pairs; - - xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second); - vector ppValVec(pp_array.vectorValueValue()); - for (size_t i = 0; i < ppValVec.size(); ++i) - { - xmlrpc_c::value_array pp_array - = xmlrpc_c::value_array(ppValVec[i]); - vector pp(pp_array.vectorValueValue()); - string L1 = xmlrpc_c::value_string(pp[0]); - string L2 = xmlrpc_c::value_string(pp[1]); - phrase_pairs.push_back(make_pair(L1,L2)); - } - - // PhraseDictionaryMultiModel* pdmm - // = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); - PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name); - vector weight_vector = pdmm->MinimizePerplexity(phrase_pairs); - - vector weight_vector_ret; - for (size_t i=0;i < weight_vector.size();i++) - weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i])); - - *retvalP = xmlrpc_c::value_array(weight_vector_ret); -#else - string errmsg = "Error: Perplexity minimization requires dlib "; - errmsg += "(compilation option --with-dlib)"; - std::cerr << errmsg << std::endl; - *retvalP = xmlrpc_c::value_string(errmsg); -#endif - } +Optimizer:: +Optimizer() +{ + // signature and help strings are documentation -- the client + // can query this information with a system.methodSignature and + // system.methodHelp RPC. + this->_signature = "S:S"; + this->_help = "Optimizes multi-model translation model"; +} + +void +Optimizer:: +execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP) +{ +#ifdef WITH_DLIB + const params_t params = paramList.getStruct(0); + params_t::const_iterator si; + if ((si = params.find("model_name")) == params.end()) { + string msg = "Missing name of model to be optimized"; + msg += " (e.g. PhraseDictionaryMultiModelCounts0)"; + throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); + } + const string model_name = xmlrpc_c::value_string(si->second); + + if ((si = params.find("phrase_pairs")) == params.end()) { + throw xmlrpc_c::fault("Missing list of phrase pairs", + xmlrpc_c::fault::CODE_PARSE); + } + + + vector > phrase_pairs; + + xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second); + vector ppValVec(pp_array.vectorValueValue()); + for (size_t i = 0; i < ppValVec.size(); ++i) { + xmlrpc_c::value_array pp_array + = xmlrpc_c::value_array(ppValVec[i]); + vector pp(pp_array.vectorValueValue()); + string L1 = xmlrpc_c::value_string(pp[0]); + string L2 = xmlrpc_c::value_string(pp[1]); + phrase_pairs.push_back(make_pair(L1,L2)); + } + + // PhraseDictionaryMultiModel* pdmm + // = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); + PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name); + vector weight_vector = pdmm->MinimizePerplexity(phrase_pairs); + + vector weight_vector_ret; + for (size_t i=0; i < weight_vector.size(); i++) + weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i])); + + *retvalP = xmlrpc_c::value_array(weight_vector_ret); +#else + string errmsg = "Error: Perplexity minimization requires dlib "; + errmsg += "(compilation option --with-dlib)"; + std::cerr << errmsg << std::endl; + *retvalP = xmlrpc_c::value_string(errmsg); +#endif +} } diff --git a/moses/server/Optimizer.h b/moses/server/Optimizer.h index 8911b089f..da84df023 100644 --- a/moses/server/Optimizer.h +++ b/moses/server/Optimizer.h @@ -6,12 +6,12 @@ namespace MosesServer { - class +class Optimizer : public xmlrpc_c::method - { - public: - Optimizer(); - void execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP); - }; +{ +public: + Optimizer(); + void execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP); +}; } diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 62e3031fa..5c87eb1a7 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -3,372 +3,363 @@ namespace MosesServer { - using namespace std; - using Moses::Hypothesis; - using Moses::StaticData; - using Moses::WordsRange; - using Moses::ChartHypothesis; - using Moses::Phrase; - using Moses::Manager; - using Moses::SearchGraphNode; - using Moses::TrellisPathList; - using Moses::TranslationOptionCollection; - using Moses::TranslationOptionList; - using Moses::TranslationOption; - using Moses::TargetPhrase; - using Moses::FValue; - using Moses::PhraseDictionaryMultiModel; - using Moses::FindPhraseDictionary; - using Moses::Sentence; +using namespace std; +using Moses::Hypothesis; +using Moses::StaticData; +using Moses::WordsRange; +using Moses::ChartHypothesis; +using Moses::Phrase; +using Moses::Manager; +using Moses::SearchGraphNode; +using Moses::TrellisPathList; +using Moses::TranslationOptionCollection; +using Moses::TranslationOptionList; +using Moses::TranslationOption; +using Moses::TargetPhrase; +using Moses::FValue; +using Moses::PhraseDictionaryMultiModel; +using Moses::FindPhraseDictionary; +using Moses::Sentence; - boost::shared_ptr - TranslationRequest:: - create(xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut) +boost::shared_ptr +TranslationRequest:: +create(xmlrpc_c::paramList const& paramList, + boost::condition_variable& cond, + boost::mutex& mut) +{ + boost::shared_ptr ret; + ret.reset(new TranslationRequest(paramList,cond, mut)); + ret->m_self = ret; + return ret; +} + +void +TranslationRequest:: +Run() +{ + parse_request(m_paramList.getStruct(0)); + + Moses::StaticData const& SD = Moses::StaticData::Instance(); + + //Make sure alternative paths are retained, if necessary + if (m_withGraphInfo || m_nbestSize>0) + // why on earth is this a global variable? Is this even thread-safe???? UG + (const_cast(SD)).SetOutputSearchGraph(true); + + std::stringstream out, graphInfo, transCollOpts; + + if (SD.IsSyntax()) + run_chart_decoder(); + else + run_phrase_decoder(); + + XVERBOSE(1,"Output: " << out.str() << endl); { - boost::shared_ptr ret; - ret.reset(new TranslationRequest(paramList,cond, mut)); - ret->m_self = ret; - return ret; + boost::lock_guard lock(m_mutex); + m_done = true; } + m_cond.notify_one(); - void - TranslationRequest:: - Run() - { - parse_request(m_paramList.getStruct(0)); +} - Moses::StaticData const& SD = Moses::StaticData::Instance(); +/// add phrase alignment information from a Hypothesis +void +TranslationRequest:: +add_phrase_aln_info(Hypothesis const& h, vector& aInfo) const +{ + if (!m_withAlignInfo) return; + WordsRange const& trg = h.GetCurrTargetWordsRange(); + WordsRange const& src = h.GetCurrSourceWordsRange(); - //Make sure alternative paths are retained, if necessary - if (m_withGraphInfo || m_nbestSize>0) - // why on earth is this a global variable? Is this even thread-safe???? UG - (const_cast(SD)).SetOutputSearchGraph(true); + std::map pAlnInfo; + pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos()); + pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos()); + pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos()); + aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo)); +} - std::stringstream out, graphInfo, transCollOpts; +void +TranslationRequest:: +outputChartHypo(ostream& out, const ChartHypothesis* hypo) +{ + Phrase outPhrase(20); + hypo->GetOutputPhrase(outPhrase); - if (SD.IsSyntax()) - run_chart_decoder(); - else - run_phrase_decoder(); + // delete 1st & last + assert(outPhrase.GetSize() >= 2); + outPhrase.RemoveWord(0); + outPhrase.RemoveWord(outPhrase.GetSize() - 1); + for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++) + out << *outPhrase.GetFactor(pos, 0) << " "; +} - XVERBOSE(1,"Output: " << out.str() << endl); - { - boost::lock_guard lock(m_mutex); - m_done = true; +bool +TranslationRequest:: +compareSearchGraphNode(const Moses::SearchGraphNode& a, + const Moses::SearchGraphNode& b) +{ + return a.hypo->GetId() < b.hypo->GetId(); +} + +void +TranslationRequest:: +insertGraphInfo(Manager& manager, map& retData) +{ + using xmlrpc_c::value_int; + using xmlrpc_c::value_double; + using xmlrpc_c::value_struct; + using xmlrpc_c::value_string; + vector searchGraphXml; + vector searchGraph; + manager.GetSearchGraph(searchGraph); + std::sort(searchGraph.begin(), searchGraph.end()); + BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) { + map x; // search graph xml node + x["forward"] = value_double(n.forward); + x["fscore"] = value_double(n.fscore); + const Hypothesis* hypo = n.hypo; + x["hyp"] = value_int(hypo->GetId()); + x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered()); + if (hypo->GetId() != 0) { + const Hypothesis *prevHypo = hypo->GetPrevHypo(); + x["back"] = value_int(prevHypo->GetId()); + x["score"] = value_double(hypo->GetScore()); + x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore()); + if (n.recombinationHypo) + x["recombined"] = value_int(n.recombinationHypo->GetId()); + x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); + x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); + x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder())); } - m_cond.notify_one(); - + searchGraphXml.push_back(value_struct(x)); } + retData["sg"] = xmlrpc_c::value_array(searchGraphXml); +} - /// add phrase alignment information from a Hypothesis - void - TranslationRequest:: - add_phrase_aln_info(Hypothesis const& h, vector& aInfo) const - { - if (!m_withAlignInfo) return; - WordsRange const& trg = h.GetCurrTargetWordsRange(); - WordsRange const& src = h.GetCurrSourceWordsRange(); +void +TranslationRequest:: +output_phrase(ostream& out, Phrase const& phrase) const +{ + if (!m_reportAllFactors) { + for (size_t i = 0 ; i < phrase.GetSize(); ++i) + out << *phrase.GetFactor(i, 0) << " "; + } else out << phrase; +} - std::map pAlnInfo; - pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos()); - pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos()); - pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos()); - aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo)); +void +TranslationRequest:: +outputNBest(const Manager& manager, map& retData) +{ + TrellisPathList nBestList; + vector nBestXml; + manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); + + BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) { + vector const& E = path->GetEdges(); + if (!E.size()) continue; + std::map nBestXmlItem; + pack_hypothesis(E, "hyp", nBestXmlItem); + if (m_withScoreBreakdown) { + // should the score breakdown be reported in a more structured manner? + ostringstream buf; + path->GetScoreBreakdown()->OutputAllFeatureScores(buf); + nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); + } + + // weighted score + nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore()); + nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem)); } + retData["nbest"] = xmlrpc_c::value_array(nBestXml); +} - void - TranslationRequest:: - outputChartHypo(ostream& out, const ChartHypothesis* hypo) - { - Phrase outPhrase(20); - hypo->GetOutputPhrase(outPhrase); +void +TranslationRequest:: +insertTranslationOptions(Moses::Manager& manager, + std::map& retData) +{ + const TranslationOptionCollection* toptsColl + = manager.getSntTranslationOptions(); + vector toptsXml; + size_t const stop = toptsColl->GetSource().GetSize(); + TranslationOptionList const* tol; + for (size_t s = 0 ; s < stop ; ++s) { + for (size_t e = s; + (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; + ++e) { + BOOST_FOREACH(TranslationOption const* topt, *tol) { + std::map toptXml; + TargetPhrase const& tp = topt->GetTargetPhrase(); + StaticData const& GLOBAL = StaticData::Instance(); + std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder()); + toptXml["phrase"] = xmlrpc_c::value_string(tphrase); + toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore()); + toptXml["start"] = xmlrpc_c::value_int(s); + toptXml["end"] = xmlrpc_c::value_int(e); + vector scoresXml; + const std::valarray &scores + = topt->GetScoreBreakdown().getCoreFeatures(); + for (size_t j = 0; j < scores.size(); ++j) + scoresXml.push_back(xmlrpc_c::value_double(scores[j])); - // delete 1st & last - assert(outPhrase.GetSize() >= 2); - outPhrase.RemoveWord(0); - outPhrase.RemoveWord(outPhrase.GetSize() - 1); - for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++) - out << *outPhrase.GetFactor(pos, 0) << " "; - } - - bool - TranslationRequest:: - compareSearchGraphNode(const Moses::SearchGraphNode& a, - const Moses::SearchGraphNode& b) - { return a.hypo->GetId() < b.hypo->GetId(); } - - void - TranslationRequest:: - insertGraphInfo(Manager& manager, map& retData) - { - using xmlrpc_c::value_int; - using xmlrpc_c::value_double; - using xmlrpc_c::value_struct; - using xmlrpc_c::value_string; - vector searchGraphXml; - vector searchGraph; - manager.GetSearchGraph(searchGraph); - std::sort(searchGraph.begin(), searchGraph.end()); - BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) - { - map x; // search graph xml node - x["forward"] = value_double(n.forward); - x["fscore"] = value_double(n.fscore); - const Hypothesis* hypo = n.hypo; - x["hyp"] = value_int(hypo->GetId()); - x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered()); - if (hypo->GetId() != 0) - { - const Hypothesis *prevHypo = hypo->GetPrevHypo(); - x["back"] = value_int(prevHypo->GetId()); - x["score"] = value_double(hypo->GetScore()); - x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore()); - if (n.recombinationHypo) - x["recombined"] = value_int(n.recombinationHypo->GetId()); - x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); - x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); - x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder())); - } - searchGraphXml.push_back(value_struct(x)); + toptXml["scores"] = xmlrpc_c::value_array(scoresXml); + toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); } - retData["sg"] = xmlrpc_c::value_array(searchGraphXml); + } + } + retData["topt"] = xmlrpc_c::value_array(toptsXml); +} + +bool +check(std::map const& params, std::string const key) +{ + std::map::const_iterator m; + return (params.find(key) != params.end()); +} + +TranslationRequest:: +TranslationRequest(xmlrpc_c::paramList const& paramList, + boost::condition_variable& cond, boost::mutex& mut) + : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) +{ } + +void +TranslationRequest:: +parse_request(std::map const& params) +{ + // parse XMLRPC request + // params_t const params = m_paramList.getStruct(0); + m_paramList.verifyEnd(1); // ??? UG + + // source text must be given, or we don't know what to translate + typedef std::map params_t; + params_t::const_iterator si = params.find("text"); + if (si == params.end()) + throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE); + m_source_string = xmlrpc_c::value_string(si->second); + XVERBOSE(1,"Input: " << m_source_string << endl); + + m_withAlignInfo = check(params, "align"); + m_withWordAlignInfo = check(params, "word-align"); + m_withGraphInfo = check(params, "sg"); + m_withTopts = check(params, "topt"); + m_reportAllFactors = check(params, "report-all-factors"); + m_nbestDistinct = check(params, "nbest-distinct"); + m_withScoreBreakdown = check(params, "add-score-breakdown"); + m_source.reset(new Sentence(0,m_source_string)); + si = params.find("lambda"); + if (si != params.end()) { + // muMo = multiModel + xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second); + vector muMoValVec(muMoArray.vectorValueValue()); + vector w(muMoValVec.size()); + for (size_t i = 0; i < muMoValVec.size(); ++i) + w[i] = xmlrpc_c::value_double(muMoValVec[i]); + if (w.size() && (si = params.find("model_name")) != params.end()) { + string const model_name = xmlrpc_c::value_string(si->second); + PhraseDictionaryMultiModel* pdmm + = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); + // Moses::PhraseDictionaryMultiModel* pdmm + // = FindPhraseDictionary(model_name); + pdmm->SetTemporaryMultiModelWeightsVector(w); + } } - void - TranslationRequest:: - output_phrase(ostream& out, Phrase const& phrase) const - { - if (!m_reportAllFactors) - { - for (size_t i = 0 ; i < phrase.GetSize(); ++i) - out << *phrase.GetFactor(i, 0) << " "; - } - else out << phrase; + // // biased sampling for suffix-array-based sampling phrase table? + // if ((si = params.find("bias")) != params.end()) + // { + // std::vector tmp + // = xmlrpc_c::value_array(si->second).cvalue(); + // for (size_t i = 1; i < tmp.size(); i += 2) + // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); + // } +} // end of Translationtask::parse_request() + + +void +TranslationRequest:: +run_chart_decoder() +{ + Moses::TreeInput tinput; + istringstream buf(m_source_string + "\n"); + tinput.Read(buf, StaticData::Instance().GetInputFactorOrder()); + + Moses::ChartManager manager(this->self()); + manager.Decode(); + + const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis(); + ostringstream out; + outputChartHypo(out,hypo); + + m_target_string = out.str(); + m_retData["text"] = xmlrpc_c::value_string(m_target_string); + + if (m_withGraphInfo) { + std::ostringstream sgstream; + manager.OutputSearchGraphMoses(sgstream); + m_retData["sg"] = xmlrpc_c::value_string(sgstream.str()); } +} // end of TranslationRequest::run_chart_decoder() - void - TranslationRequest:: - outputNBest(const Manager& manager, map& retData) - { - TrellisPathList nBestList; - vector nBestXml; - manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); +void +TranslationRequest:: +pack_hypothesis(vector const& edges, string const& key, + map & dest) const +{ + // target string + ostringstream target; + BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) + output_phrase(target, e->GetCurrTargetPhrase()); + dest[key] = xmlrpc_c::value_string(target.str()); - BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) - { - vector const& E = path->GetEdges(); - if (!E.size()) continue; - std::map nBestXmlItem; - pack_hypothesis(E, "hyp", nBestXmlItem); - if (m_withScoreBreakdown) - { - // should the score breakdown be reported in a more structured manner? - ostringstream buf; - path->GetScoreBreakdown()->OutputAllFeatureScores(buf); - nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); - } + if (m_withAlignInfo) { + // phrase alignment, if requested - // weighted score - nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore()); - nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem)); - } - retData["nbest"] = xmlrpc_c::value_array(nBestXml); - } - - void - TranslationRequest:: - insertTranslationOptions(Moses::Manager& manager, - std::map& retData) - { - const TranslationOptionCollection* toptsColl - = manager.getSntTranslationOptions(); - vector toptsXml; - size_t const stop = toptsColl->GetSource().GetSize(); - TranslationOptionList const* tol; - for (size_t s = 0 ; s < stop ; ++s) - { - for (size_t e = s; - (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; - ++e) - { - BOOST_FOREACH(TranslationOption const* topt, *tol) - { - std::map toptXml; - TargetPhrase const& tp = topt->GetTargetPhrase(); - StaticData const& GLOBAL = StaticData::Instance(); - std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder()); - toptXml["phrase"] = xmlrpc_c::value_string(tphrase); - toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore()); - toptXml["start"] = xmlrpc_c::value_int(s); - toptXml["end"] = xmlrpc_c::value_int(e); - vector scoresXml; - const std::valarray &scores - = topt->GetScoreBreakdown().getCoreFeatures(); - for (size_t j = 0; j < scores.size(); ++j) - scoresXml.push_back(xmlrpc_c::value_double(scores[j])); - - toptXml["scores"] = xmlrpc_c::value_array(scoresXml); - toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); - } - } - } - retData["topt"] = xmlrpc_c::value_array(toptsXml); - } - - bool - check(std::map const& params, std::string const key) - { - std::map::const_iterator m; - return (params.find(key) != params.end()); - } - - TranslationRequest:: - TranslationRequest(xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, boost::mutex& mut) - : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) - { } - - void - TranslationRequest:: - parse_request(std::map const& params) - { // parse XMLRPC request - // params_t const params = m_paramList.getStruct(0); - m_paramList.verifyEnd(1); // ??? UG - - // source text must be given, or we don't know what to translate - typedef std::map params_t; - params_t::const_iterator si = params.find("text"); - if (si == params.end()) - throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE); - m_source_string = xmlrpc_c::value_string(si->second); - XVERBOSE(1,"Input: " << m_source_string << endl); - - m_withAlignInfo = check(params, "align"); - m_withWordAlignInfo = check(params, "word-align"); - m_withGraphInfo = check(params, "sg"); - m_withTopts = check(params, "topt"); - m_reportAllFactors = check(params, "report-all-factors"); - m_nbestDistinct = check(params, "nbest-distinct"); - m_withScoreBreakdown = check(params, "add-score-breakdown"); - m_source.reset(new Sentence(0,m_source_string)); - si = params.find("lambda"); - if (si != params.end()) - { - // muMo = multiModel - xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second); - vector muMoValVec(muMoArray.vectorValueValue()); - vector w(muMoValVec.size()); - for (size_t i = 0; i < muMoValVec.size(); ++i) - w[i] = xmlrpc_c::value_double(muMoValVec[i]); - if (w.size() && (si = params.find("model_name")) != params.end()) - { - string const model_name = xmlrpc_c::value_string(si->second); - PhraseDictionaryMultiModel* pdmm - = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); - // Moses::PhraseDictionaryMultiModel* pdmm - // = FindPhraseDictionary(model_name); - pdmm->SetTemporaryMultiModelWeightsVector(w); - } - } - - // // biased sampling for suffix-array-based sampling phrase table? - // if ((si = params.find("bias")) != params.end()) - // { - // std::vector tmp - // = xmlrpc_c::value_array(si->second).cvalue(); - // for (size_t i = 1; i < tmp.size(); i += 2) - // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); - // } - } // end of Translationtask::parse_request() - - - void - TranslationRequest:: - run_chart_decoder() - { - Moses::TreeInput tinput; - istringstream buf(m_source_string + "\n"); - tinput.Read(buf, StaticData::Instance().GetInputFactorOrder()); - - Moses::ChartManager manager(this->self()); - manager.Decode(); - - const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis(); - ostringstream out; - outputChartHypo(out,hypo); - - m_target_string = out.str(); - m_retData["text"] = xmlrpc_c::value_string(m_target_string); - - if (m_withGraphInfo) - { - std::ostringstream sgstream; - manager.OutputSearchGraphMoses(sgstream); - m_retData["sg"] = xmlrpc_c::value_string(sgstream.str()); - } - } // end of TranslationRequest::run_chart_decoder() - - void - TranslationRequest:: - pack_hypothesis(vector const& edges, string const& key, - map & dest) const - { - // target string - ostringstream target; + vector p_aln; BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) - output_phrase(target, e->GetCurrTargetPhrase()); - dest[key] = xmlrpc_c::value_string(target.str()); - - if (m_withAlignInfo) - { // phrase alignment, if requested - - vector p_aln; - BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) - add_phrase_aln_info(*e, p_aln); - dest["align"] = xmlrpc_c::value_array(p_aln); - } - - if (m_withWordAlignInfo) - { // word alignment, if requested - vector w_aln; - BOOST_FOREACH(Hypothesis const* e, edges) - e->OutputLocalWordAlignment(w_aln); - dest["word-align"] = xmlrpc_c::value_array(w_aln); - } + add_phrase_aln_info(*e, p_aln); + dest["align"] = xmlrpc_c::value_array(p_aln); } - void - TranslationRequest:: - pack_hypothesis(Hypothesis const* h, string const& key, - map& dest) const - { - using namespace std; - vector edges; - for (;h; h = h->GetPrevHypo()) - edges.push_back(h); - pack_hypothesis(edges, key, dest); - } - - - void - TranslationRequest:: - run_phrase_decoder() - { - Manager manager(this->self()); - // if (m_bias.size()) manager.SetBias(&m_bias); - manager.Decode(); - - pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); - - if (m_withGraphInfo) insertGraphInfo(manager,m_retData); - if (m_withTopts) insertTranslationOptions(manager,m_retData); - if (m_nbestSize) outputNBest(manager, m_retData); - - (const_cast(Moses::StaticData::Instance())) - .SetOutputSearchGraph(false); - // WTF? one more reason not to have this as global variable! --- UG - + if (m_withWordAlignInfo) { + // word alignment, if requested + vector w_aln; + BOOST_FOREACH(Hypothesis const* e, edges) + e->OutputLocalWordAlignment(w_aln); + dest["word-align"] = xmlrpc_c::value_array(w_aln); } } + +void +TranslationRequest:: +pack_hypothesis(Hypothesis const* h, string const& key, + map& dest) const +{ + using namespace std; + vector edges; + for (; h; h = h->GetPrevHypo()) + edges.push_back(h); + pack_hypothesis(edges, key, dest); +} + + +void +TranslationRequest:: +run_phrase_decoder() +{ + Manager manager(this->self()); + // if (m_bias.size()) manager.SetBias(&m_bias); + manager.Decode(); + + pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); + + if (m_withGraphInfo) insertGraphInfo(manager,m_retData); + if (m_withTopts) insertTranslationOptions(manager,m_retData); + if (m_nbestSize) outputNBest(manager, m_retData); + + (const_cast(Moses::StaticData::Instance())) + .SetOutputSearchGraph(false); + // WTF? one more reason not to have this as global variable! --- UG + +} +} diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 6c7cd7275..d67e55e03 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -23,100 +23,106 @@ #include namespace MosesServer { - class +class TranslationRequest : public virtual Moses::TranslationTask - { - boost::condition_variable& m_cond; - boost::mutex& m_mutex; - bool m_done; +{ + boost::condition_variable& m_cond; + boost::mutex& m_mutex; + bool m_done; - xmlrpc_c::paramList const& m_paramList; - std::map m_retData; - std::map m_bias; // for biased sampling + xmlrpc_c::paramList const& m_paramList; + std::map m_retData; + std::map m_bias; // for biased sampling - std::string m_source_string, m_target_string; - bool m_withAlignInfo; - bool m_withWordAlignInfo; - bool m_withGraphInfo; - bool m_withTopts; - bool m_reportAllFactors; - bool m_nbestDistinct; - bool m_withScoreBreakdown; - size_t m_nbestSize; + std::string m_source_string, m_target_string; + bool m_withAlignInfo; + bool m_withWordAlignInfo; + bool m_withGraphInfo; + bool m_withTopts; + bool m_reportAllFactors; + bool m_nbestDistinct; + bool m_withScoreBreakdown; + size_t m_nbestSize; - void - parse_request(); + void + parse_request(); - void - parse_request(std::map const& req); + void + parse_request(std::map const& req); - virtual void - run_chart_decoder(); + virtual void + run_chart_decoder(); - virtual void - run_phrase_decoder(); + virtual void + run_phrase_decoder(); - void - pack_hypothesis(std::vector const& edges, - std::string const& key, - std::map & dest) const; + void + pack_hypothesis(std::vector const& edges, + std::string const& key, + std::map & dest) const; - void - pack_hypothesis(Moses::Hypothesis const* h, std::string const& key, - std::map & dest) const; + void + pack_hypothesis(Moses::Hypothesis const* h, std::string const& key, + std::map & dest) const; - void - output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; + void + output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; - void - add_phrase_aln_info(Moses::Hypothesis const& h, - std::vector& aInfo) const; + void + add_phrase_aln_info(Moses::Hypothesis const& h, + std::vector& aInfo) const; - void - outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo); + void + outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo); - bool - compareSearchGraphNode(const Moses::SearchGraphNode& a, - const Moses::SearchGraphNode& b); + bool + compareSearchGraphNode(const Moses::SearchGraphNode& a, + const Moses::SearchGraphNode& b); - void - insertGraphInfo(Moses::Manager& manager, - std::map& retData); - void - outputNBest(Moses::Manager const& manager, - std::map& retData); + void + insertGraphInfo(Moses::Manager& manager, + std::map& retData); + void + outputNBest(Moses::Manager const& manager, + std::map& retData); - void - insertTranslationOptions(Moses::Manager& manager, - std::map& retData); - protected: - TranslationRequest(xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut); + void + insertTranslationOptions(Moses::Manager& manager, + std::map& retData); +protected: + TranslationRequest(xmlrpc_c::paramList const& paramList, + boost::condition_variable& cond, + boost::mutex& mut); - public: +public: - static - boost::shared_ptr - create(xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut); + static + boost::shared_ptr + create(xmlrpc_c::paramList const& paramList, + boost::condition_variable& cond, + boost::mutex& mut); - virtual bool - DeleteAfterExecution() { return false; } + virtual bool + DeleteAfterExecution() { + return false; + } - bool - IsDone() const { return m_done; } + bool + IsDone() const { + return m_done; + } - std::map const& - GetRetData() { return m_retData; } + std::map const& + GetRetData() { + return m_retData; + } - void - Run(); + void + Run(); - }; +}; } diff --git a/moses/server/Translator.cpp b/moses/server/Translator.cpp index d4cff99df..be8920abd 100644 --- a/moses/server/Translator.cpp +++ b/moses/server/Translator.cpp @@ -4,34 +4,34 @@ namespace MosesServer { - using namespace std; - using namespace Moses; +using namespace std; +using namespace Moses; - Translator:: - Translator(size_t numThreads) - : m_threadPool(numThreads) - { - // signature and help strings are documentation -- the client - // can query this information with a system.methodSignature and - // system.methodHelp RPC. - this->_signature = "S:S"; - this->_help = "Does translation"; - } +Translator:: +Translator(size_t numThreads) + : m_threadPool(numThreads) +{ + // signature and help strings are documentation -- the client + // can query this information with a system.methodSignature and + // system.methodHelp RPC. + this->_signature = "S:S"; + this->_help = "Does translation"; +} - void - Translator:: - execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP) - { - boost::condition_variable cond; - boost::mutex mut; - boost::shared_ptr task - = TranslationRequest::create(paramList,cond,mut); - m_threadPool.Submit(task); - boost::unique_lock lock(mut); - while (!task->IsDone()) - cond.wait(lock); - *retvalP = xmlrpc_c::value_struct(task->GetRetData()); - } +void +Translator:: +execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP) +{ + boost::condition_variable cond; + boost::mutex mut; + boost::shared_ptr task + = TranslationRequest::create(paramList,cond,mut); + m_threadPool.Submit(task); + boost::unique_lock lock(mut); + while (!task->IsDone()) + cond.wait(lock); + *retvalP = xmlrpc_c::value_struct(task->GetRetData()); +} } diff --git a/moses/server/Translator.h b/moses/server/Translator.h index e3117c290..4a6f889e8 100644 --- a/moses/server/Translator.h +++ b/moses/server/Translator.h @@ -10,17 +10,17 @@ #endif namespace MosesServer { - class +class // MosesServer:: Translator : public xmlrpc_c::method - { - public: - Translator(size_t numThreads = 10); +{ +public: + Translator(size_t numThreads = 10); - void execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP); - private: - Moses::ThreadPool m_threadPool; - }; + void execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP); +private: + Moses::ThreadPool m_threadPool; +}; } diff --git a/moses/server/Updater.cpp b/moses/server/Updater.cpp index 818f374a5..bf129bf49 100644 --- a/moses/server/Updater.cpp +++ b/moses/server/Updater.cpp @@ -2,56 +2,56 @@ namespace MosesServer { - using namespace Moses; - using namespace std; +using namespace Moses; +using namespace std; - Updater:: - Updater() - { - // signature and help strings are documentation -- the client - // can query this information with a system.methodSignature and - // system.methodHelp RPC. - this->_signature = "S:S"; - this->_help = "Updates stuff"; - } +Updater:: +Updater() +{ + // signature and help strings are documentation -- the client + // can query this information with a system.methodSignature and + // system.methodHelp RPC. + this->_signature = "S:S"; + this->_help = "Updates stuff"; +} - void - Updater:: - execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP) - { +void +Updater:: +execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP) +{ #if PT_UG - const params_t params = paramList.getStruct(0); - breakOutParams(params); - Mmsapt* pdsa = reinterpret_cast(PhraseDictionary::GetColl()[0]); - pdsa->add(m_src, m_trg, m_aln); - XVERBOSE(1,"Done inserting\n"); - *retvalP = xmlrpc_c::value_string("Phrase table updated"); + const params_t params = paramList.getStruct(0); + breakOutParams(params); + Mmsapt* pdsa = reinterpret_cast(PhraseDictionary::GetColl()[0]); + pdsa->add(m_src, m_trg, m_aln); + XVERBOSE(1,"Done inserting\n"); + *retvalP = xmlrpc_c::value_string("Phrase table updated"); #endif - }; +}; - void - Updater:: - breakOutParams(const params_t& params) - { - params_t::const_iterator si = params.find("source"); - if(si == params.end()) - throw xmlrpc_c::fault("Missing source sentence", - xmlrpc_c::fault::CODE_PARSE); - m_src = xmlrpc_c::value_string(si->second); - XVERBOSE(1,"source = " << m_src << endl); - si = params.find("target"); - if(si == params.end()) - throw xmlrpc_c::fault("Missing target sentence", - xmlrpc_c::fault::CODE_PARSE); - m_trg = xmlrpc_c::value_string(si->second); - XVERBOSE(1,"target = " << m_trg << endl); - if((si = params.find("alignment")) == params.end()) - throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE); - m_aln = xmlrpc_c::value_string(si->second); - XVERBOSE(1,"alignment = " << m_aln << endl); - m_bounded = ((si = params.find("bounded")) != params.end()); - m_add2ORLM = ((si = params.find("updateORLM")) != params.end()); - }; +void +Updater:: +breakOutParams(const params_t& params) +{ + params_t::const_iterator si = params.find("source"); + if(si == params.end()) + throw xmlrpc_c::fault("Missing source sentence", + xmlrpc_c::fault::CODE_PARSE); + m_src = xmlrpc_c::value_string(si->second); + XVERBOSE(1,"source = " << m_src << endl); + si = params.find("target"); + if(si == params.end()) + throw xmlrpc_c::fault("Missing target sentence", + xmlrpc_c::fault::CODE_PARSE); + m_trg = xmlrpc_c::value_string(si->second); + XVERBOSE(1,"target = " << m_trg << endl); + if((si = params.find("alignment")) == params.end()) + throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE); + m_aln = xmlrpc_c::value_string(si->second); + XVERBOSE(1,"alignment = " << m_aln << endl); + m_bounded = ((si = params.find("bounded")) != params.end()); + m_add2ORLM = ((si = params.find("updateORLM")) != params.end()); +}; } diff --git a/moses/server/Updater.h b/moses/server/Updater.h index 9bb20b775..e3eba52ef 100644 --- a/moses/server/Updater.h +++ b/moses/server/Updater.h @@ -19,26 +19,26 @@ namespace MosesServer { - class +class Updater: public xmlrpc_c::method - { +{ - typedef std::map params_t; + typedef std::map params_t; - std::string m_src, m_trg, m_aln; - bool m_bounded, m_add2ORLM; + std::string m_src, m_trg, m_aln; + bool m_bounded, m_add2ORLM; - public: - Updater(); +public: + Updater(); - void - execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP); + void + execute(xmlrpc_c::paramList const& paramList, + xmlrpc_c::value * const retvalP); - void - breakOutParams(const params_t& params); + void + breakOutParams(const params_t& params); - }; +}; } diff --git a/moses/thread_safe_container.h b/moses/thread_safe_container.h index 1983d7234..6a977185b 100644 --- a/moses/thread_safe_container.h +++ b/moses/thread_safe_container.h @@ -18,108 +18,104 @@ namespace Moses { - // todo: replace this with thread lock-free containers, if a stable library can - // be found somewhere +// todo: replace this with thread lock-free containers, if a stable library can +// be found somewhere - template > - class +template > +class ThreadSafeContainer +{ +protected: + mutable boost::shared_mutex m_lock; + CONTAINER m_container; + typedef typename CONTAINER::iterator iter_t; + typedef typename CONTAINER::const_iterator const_iter_t; + typedef typename CONTAINER::value_type entry_t; +public: + + class locking_iterator { - protected: - mutable boost::shared_mutex m_lock; - CONTAINER m_container; - typedef typename CONTAINER::iterator iter_t; - typedef typename CONTAINER::const_iterator const_iter_t; - typedef typename CONTAINER::value_type entry_t; + boost::unique_lock m_lock; + CONTAINER const* m_container; + const_iter_t m_iter; + + locking_iterator(locking_iterator const& other); // no copies! public: + locking_iterator() : m_container(NULL) { } - class locking_iterator - { - boost::unique_lock m_lock; - CONTAINER const* m_container; - const_iter_t m_iter; + locking_iterator(boost::shared_mutex& lock, + CONTAINER const* container, + const_iter_t const& iter) + : m_lock(lock), m_container(container), m_iter(iter) + { } - locking_iterator(locking_iterator const& other); // no copies! - public: - locking_iterator() : m_container(NULL) { } - - locking_iterator(boost::shared_mutex& lock, - CONTAINER const* container, - const_iter_t const& iter) - : m_lock(lock), m_container(container), m_iter(iter) - { } - - entry_t const& operator->() - { - UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid " - << "or has not been assigned."); - return m_iter.operator->(); - } - - // locking operators transfer the lock upon assignment and become invalid - locking_iterator const& - operator=(locking_iterator& other) - { - m_lock.swap(other.m_lock); - m_iter = other.m_iter; - other.m_iter = other.m_container.end(); - } - - bool - operator==(const_iter_t const& other) - { - return m_iter == other; - } - - locking_iterator const& - operator++() { ++m_iter; return *this; } - - // DO NOT DEFINE THE POST-INCREMENT OPERATOR! - // locking_operators are non-copyable, - // so we can't simply make a copy before incrementing and return - // the copy after incrementing - locking_iterator const& - operator++(int); - }; - - const_iter_t const& end() const - { return m_container.end(); } - - locking_iterator begin() const - { - return locking_iterator(m_lock, this, m_container.begin()); + entry_t const& operator->() { + UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid " + << "or has not been assigned."); + return m_iter.operator->(); } - VAL const& set(KEY const& key, VAL const& val) - { - boost::unique_lock< boost::shared_mutex > lock(m_lock); - entry_t entry(key,val); - iter_t foo = m_container.insert(entry).first; - foo->second = val; - return foo->second; + // locking operators transfer the lock upon assignment and become invalid + locking_iterator const& + operator=(locking_iterator& other) { + m_lock.swap(other.m_lock); + m_iter = other.m_iter; + other.m_iter = other.m_container.end(); } - VAL const* get(KEY const& key, VAL const& default_val) - { - boost::shared_lock< boost::shared_mutex > lock(m_lock); - entry_t entry(key, default_val); - iter_t foo = m_container.insert(entry).first; - return &(foo->second); + bool + operator==(const_iter_t const& other) { + return m_iter == other; } - VAL const* get(KEY const& key) const - { - boost::shared_lock< boost::shared_mutex > lock(m_lock); - const_iter_t m = m_container.find(key); - if (m == m_container.end()) return NULL; - return &m->second; + locking_iterator const& + operator++() { + ++m_iter; + return *this; } - size_t erase(KEY const& key) - { - boost::unique_lock< boost::shared_mutex > lock(m_lock); - return m_container.erase(key); - } + // DO NOT DEFINE THE POST-INCREMENT OPERATOR! + // locking_operators are non-copyable, + // so we can't simply make a copy before incrementing and return + // the copy after incrementing + locking_iterator const& + operator++(int); }; + + const_iter_t const& end() const { + return m_container.end(); + } + + locking_iterator begin() const { + return locking_iterator(m_lock, this, m_container.begin()); + } + + VAL const& set(KEY const& key, VAL const& val) { + boost::unique_lock< boost::shared_mutex > lock(m_lock); + entry_t entry(key,val); + iter_t foo = m_container.insert(entry).first; + foo->second = val; + return foo->second; + } + + VAL const* get(KEY const& key, VAL const& default_val) { + boost::shared_lock< boost::shared_mutex > lock(m_lock); + entry_t entry(key, default_val); + iter_t foo = m_container.insert(entry).first; + return &(foo->second); + } + + VAL const* get(KEY const& key) const { + boost::shared_lock< boost::shared_mutex > lock(m_lock); + const_iter_t m = m_container.find(key); + if (m == m_container.end()) return NULL; + return &m->second; + } + + size_t erase(KEY const& key) { + boost::unique_lock< boost::shared_mutex > lock(m_lock); + return m_container.erase(key); + } +}; } #endif diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h index 3fa380d4f..58935a727 100644 --- a/phrase-extract/ExtractionPhrasePair.h +++ b/phrase-extract/ExtractionPhrasePair.h @@ -146,7 +146,7 @@ public: void AddProperty(const std::string &key, const std::string &value, float count) { std::map >::iterator iter = m_properties.find(key); + std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key); if ( iter == m_properties.end() ) { // key not found: insert property key and value PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES(); diff --git a/phrase-extract/PropertiesConsolidator.cpp b/phrase-extract/PropertiesConsolidator.cpp index 59c56b54b..94b6ea13a 100644 --- a/phrase-extract/PropertiesConsolidator.cpp +++ b/phrase-extract/PropertiesConsolidator.cpp @@ -116,18 +116,18 @@ void PropertiesConsolidator::ProcessPropertiesString(const std::string &properti } else if ( !keyValue[0].compare("POS") ) { -/* DO NOTHING (property is not registered in the decoder at the moment) - if ( m_partsOfSpeechFlag ) { + /* DO NOTHING (property is not registered in the decoder at the moment) + if ( m_partsOfSpeechFlag ) { - // POS property: replace strings with vocabulary indices - out << " {{" << keyValue[0]; - ProcessPOSPropertyValue(keyValue[1], out); - out << "}}"; + // POS property: replace strings with vocabulary indices + out << " {{" << keyValue[0]; + ProcessPOSPropertyValue(keyValue[1], out); + out << "}}"; - } else { // don't process POS property - out << " {{" << keyValue[0] << " " << keyValue[1] << "}}"; - } -*/ + } else { // don't process POS property + out << " {{" << keyValue[0] << " " << keyValue[1] << "}}"; + } + */ } else { diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp index 7e9a3ec0a..6468b7473 100644 --- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp +++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp @@ -699,12 +699,12 @@ void ExtractGHKM::WriteGlueGrammar( // const size_t partOfSpeechSentenceStart = 0; // const size_t partOfSpeechSentenceEnd = 1; - #ifndef BOS_ - #define BOS_ "" //Beginning of sentence symbol - #endif - #ifndef EOS_ - #define EOS_ "" //End of sentence symbol - #endif +#ifndef BOS_ +#define BOS_ "" //Beginning of sentence symbol +#endif +#ifndef EOS_ +#define EOS_ "" //End of sentence symbol +#endif std::string sentenceStartSource = BOS_; std::string sentenceEndSource = EOS_; diff --git a/phrase-extract/filter-rule-table/TreeCfgFilter.cpp b/phrase-extract/filter-rule-table/TreeCfgFilter.cpp index 153c706f3..cb04dc94e 100644 --- a/phrase-extract/filter-rule-table/TreeCfgFilter.cpp +++ b/phrase-extract/filter-rule-table/TreeCfgFilter.cpp @@ -12,7 +12,7 @@ namespace FilterRuleTable { TreeCfgFilter::TreeCfgFilter( - const std::vector > &sentences) + const std::vector > &sentences) { } diff --git a/phrase-extract/filter-rule-table/TreeCfgFilter.h b/phrase-extract/filter-rule-table/TreeCfgFilter.h index 5812a6dcc..7dd0fa072 100644 --- a/phrase-extract/filter-rule-table/TreeCfgFilter.h +++ b/phrase-extract/filter-rule-table/TreeCfgFilter.h @@ -25,8 +25,9 @@ namespace FilterRuleTable // Filters a rule table, discarding rules that cannot be applied to a given // test set. The rule table must have a TSG source-side and the test sentences // must be parse trees. -class TreeCfgFilter : public CfgFilter { - public: +class TreeCfgFilter : public CfgFilter +{ +public: // Initialize the filter for a given set of test sentences. TreeCfgFilter(const std::vector > &); diff --git a/phrase-extract/postprocess-egret-forests/Forest.h b/phrase-extract/postprocess-egret-forests/Forest.h index 88344e0d7..7f00ecb88 100644 --- a/phrase-extract/postprocess-egret-forests/Forest.h +++ b/phrase-extract/postprocess-egret-forests/Forest.h @@ -15,7 +15,7 @@ namespace PostprocessEgretForests class Forest { - public: +public: struct Vertex; struct Hyperedge { @@ -35,7 +35,7 @@ class Forest std::vector > vertices; - private: +private: // Copying is not allowed. Forest(const Forest &); Forest &operator=(const Forest &); diff --git a/phrase-extract/postprocess-egret-forests/ForestParser.cpp b/phrase-extract/postprocess-egret-forests/ForestParser.cpp index 565117ace..21e479ca6 100644 --- a/phrase-extract/postprocess-egret-forests/ForestParser.cpp +++ b/phrase-extract/postprocess-egret-forests/ForestParser.cpp @@ -17,15 +17,18 @@ namespace PostprocessEgretForests { ForestParser::ForestParser() - : m_input(0) { + : m_input(0) +{ } ForestParser::ForestParser(std::istream &input) - : m_input(&input) { + : m_input(&input) +{ ++(*this); } -ForestParser &ForestParser::operator++() { +ForestParser &ForestParser::operator++() +{ if (!m_input) { return *this; } @@ -106,7 +109,7 @@ void ForestParser::ParseHyperedgeLine(const std::string &line, Forest &forest) } boost::shared_ptr ForestParser::ParseVertex( - const StringPiece &s) + const StringPiece &s) { VertexSP v = boost::make_shared(); std::size_t pos = s.rfind('['); @@ -132,12 +135,14 @@ boost::shared_ptr ForestParser::ParseVertex( return v; } -bool operator==(const ForestParser &lhs, const ForestParser &rhs) { +bool operator==(const ForestParser &lhs, const ForestParser &rhs) +{ // TODO Is this right? Compare values of istreams if non-zero? return lhs.m_input == rhs.m_input; } -bool operator!=(const ForestParser &lhs, const ForestParser &rhs) { +bool operator!=(const ForestParser &lhs, const ForestParser &rhs) +{ return !(lhs == rhs); } diff --git a/phrase-extract/postprocess-egret-forests/ForestParser.h b/phrase-extract/postprocess-egret-forests/ForestParser.h index ee9ced56a..7f0b6f297 100644 --- a/phrase-extract/postprocess-egret-forests/ForestParser.h +++ b/phrase-extract/postprocess-egret-forests/ForestParser.h @@ -20,8 +20,9 @@ namespace Syntax namespace PostprocessEgretForests { -class ForestParser { - public: +class ForestParser +{ +public: struct Entry { std::size_t sentNum; std::string sentence; @@ -31,15 +32,19 @@ class ForestParser { ForestParser(); ForestParser(std::istream &); - Entry &operator*() { return m_entry; } - Entry *operator->() { return &m_entry; } + Entry &operator*() { + return m_entry; + } + Entry *operator->() { + return &m_entry; + } ForestParser &operator++(); friend bool operator==(const ForestParser &, const ForestParser &); friend bool operator!=(const ForestParser &, const ForestParser &); - private: +private: typedef boost::shared_ptr VertexSP; typedef boost::shared_ptr HyperedgeSP; @@ -60,7 +65,7 @@ class ForestParser { }; typedef boost::unordered_set VertexSet; + VertexSetPred> VertexSet; // Copying is not allowed ForestParser(const ForestParser &); diff --git a/phrase-extract/postprocess-egret-forests/ForestWriter.cpp b/phrase-extract/postprocess-egret-forests/ForestWriter.cpp index 7d8360098..54a2cbed9 100644 --- a/phrase-extract/postprocess-egret-forests/ForestWriter.cpp +++ b/phrase-extract/postprocess-egret-forests/ForestWriter.cpp @@ -61,7 +61,8 @@ void ForestWriter::WriteVertex(const Forest::Vertex &v) } } -std::string ForestWriter::PossiblyEscape(const std::string &s) const { +std::string ForestWriter::PossiblyEscape(const std::string &s) const +{ if (m_options.escape) { return Escape(s); } else { @@ -70,7 +71,8 @@ std::string ForestWriter::PossiblyEscape(const std::string &s) const { } // Escapes XML special characters. -std::string ForestWriter::Escape(const std::string &s) const { +std::string ForestWriter::Escape(const std::string &s) const +{ std::string t; std::size_t len = s.size(); t.reserve(len); diff --git a/phrase-extract/postprocess-egret-forests/ForestWriter.h b/phrase-extract/postprocess-egret-forests/ForestWriter.h index ae3cf028d..10c1fe05c 100644 --- a/phrase-extract/postprocess-egret-forests/ForestWriter.h +++ b/phrase-extract/postprocess-egret-forests/ForestWriter.h @@ -15,13 +15,13 @@ namespace PostprocessEgretForests class ForestWriter { - public: +public: ForestWriter(const Options &options, std::ostream &out) : m_options(options), m_out(out) {} void Write(const std::string &, const Forest &, std::size_t); - private: +private: std::string Escape(const std::string &) const; std::string PossiblyEscape(const std::string &) const; void WriteHyperedgeLine(const Forest::Hyperedge &); diff --git a/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp b/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp index 92a9d195f..d87e082dc 100644 --- a/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp +++ b/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp @@ -47,8 +47,8 @@ int PostprocessEgretForests::Main(int argc, char *argv[]) } void PostprocessEgretForests::ProcessForest( - std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser, - const Options &options) + std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser, + const Options &options) { std::size_t sentNum = 0; ForestWriter writer(options, out); @@ -77,7 +77,7 @@ void PostprocessEgretForests::ProcessForest( } void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename, - std::ifstream &stream) + std::ifstream &stream) { stream.open(filename.c_str()); if (!stream) { @@ -88,7 +88,7 @@ void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename, } void PostprocessEgretForests::ProcessOptions(int argc, char *argv[], - Options &options) const + Options &options) const { namespace po = boost::program_options; namespace cls = boost::program_options::command_line_style; @@ -119,7 +119,7 @@ void PostprocessEgretForests::ProcessOptions(int argc, char *argv[], // (these are used as positional options). po::options_description hidden("Hidden options"); hidden.add_options() - // None + // None ; // Compose the full set of command-line options. diff --git a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp index 5b3ce7a54..4bf3c4792 100644 --- a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp +++ b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp @@ -16,15 +16,18 @@ namespace PostprocessEgretForests { SplitPointFileParser::SplitPointFileParser() - : m_input(0) { + : m_input(0) +{ } SplitPointFileParser::SplitPointFileParser(std::istream &input) - : m_input(&input) { + : m_input(&input) +{ ++(*this); } -SplitPointFileParser &SplitPointFileParser::operator++() { +SplitPointFileParser &SplitPointFileParser::operator++() +{ if (!m_input) { return *this; } @@ -66,13 +69,15 @@ void SplitPointFileParser::ParseLine(const std::string &line, } bool operator==(const SplitPointFileParser &lhs, - const SplitPointFileParser &rhs) { + const SplitPointFileParser &rhs) +{ // TODO Is this right? Compare values of istreams if non-zero? return lhs.m_input == rhs.m_input; } bool operator!=(const SplitPointFileParser &lhs, - const SplitPointFileParser &rhs) { + const SplitPointFileParser &rhs) +{ return !(lhs == rhs); } diff --git a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h index a2d700971..35fdb3ad2 100644 --- a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h +++ b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h @@ -13,8 +13,9 @@ namespace Syntax namespace PostprocessEgretForests { -class SplitPointFileParser { - public: +class SplitPointFileParser +{ +public: struct Entry { std::vector splitPoints; }; @@ -22,8 +23,12 @@ class SplitPointFileParser { SplitPointFileParser(); SplitPointFileParser(std::istream &); - const Entry &operator*() const { return m_entry; } - const Entry *operator->() const { return &m_entry; } + const Entry &operator*() const { + return m_entry; + } + const Entry *operator->() const { + return &m_entry; + } SplitPointFileParser &operator++(); @@ -33,7 +38,7 @@ class SplitPointFileParser { friend bool operator!=(const SplitPointFileParser &, const SplitPointFileParser &); - private: +private: void ParseLine(const std::string &, std::vector &); Entry m_entry; diff --git a/phrase-extract/postprocess-egret-forests/Symbol.h b/phrase-extract/postprocess-egret-forests/Symbol.h index f00f642d3..1b8929f49 100644 --- a/phrase-extract/postprocess-egret-forests/Symbol.h +++ b/phrase-extract/postprocess-egret-forests/Symbol.h @@ -30,14 +30,14 @@ inline bool operator==(const Symbol &s, const Symbol &t) } struct SymbolHasher { - public: +public: std::size_t operator()(const Symbol &s) const { return hash_value(s); } }; struct SymbolEqualityPred { - public: +public: bool operator()(const Symbol &s, const Symbol &t) const { return s.value == t.value && s.isNonTerminal == t.isNonTerminal; } diff --git a/phrase-extract/postprocess-egret-forests/TopologicalSorter.h b/phrase-extract/postprocess-egret-forests/TopologicalSorter.h index 96f19a8e9..7ed667369 100644 --- a/phrase-extract/postprocess-egret-forests/TopologicalSorter.h +++ b/phrase-extract/postprocess-egret-forests/TopologicalSorter.h @@ -16,10 +16,10 @@ namespace PostprocessEgretForests class TopologicalSorter { - public: +public: void Sort(const Forest &, std::vector &); - private: +private: typedef boost::unordered_set VertexSet; void BuildPredSets(const Forest &); diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp index 53e4ab501..b65dce4ba 100644 --- a/phrase-extract/score-main.cpp +++ b/phrase-extract/score-main.cpp @@ -900,7 +900,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, } if (nonTermContext && !inverseFlag) { - std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext"); + std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext"); if (!propValue.empty() && propValue.size() < 50000) { size_t nNTs = NumNonTerminal(phraseSource); phraseTableFile << " {{NonTermContext " << nNTs << " " << propValue << "}}"; @@ -908,7 +908,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, } if (nonTermContextTarget && !inverseFlag) { - std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget"); + std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget"); if (!propValue.empty() && propValue.size() < 50000) { size_t nNTs = NumNonTerminal(phraseSource); phraseTableFile << " {{NonTermContextTarget " << nNTs << " " << propValue << "}}"; diff --git a/symal/symal.cpp b/symal/symal.cpp index 7f3e22866..927676393 100644 --- a/symal/symal.cpp +++ b/symal/symal.cpp @@ -427,7 +427,7 @@ int main(int argc, char** argv) ostream *out = &std::cout; if (input) { - fstream *fin = new fstream(input,ios::in); + fstream *fin = new fstream(input,ios::in); if (!fin->is_open()) { cerr << "cannot open " << input << "\n"; exit(1); @@ -436,7 +436,7 @@ int main(int argc, char** argv) } if (output) { - fstream *fout = new fstream(output,ios::out); + fstream *fout = new fstream(output,ios::out); if (!fout->is_open()) { cerr << "cannot open " << output << "\n"; exit(1); @@ -506,12 +506,12 @@ int main(int argc, char** argv) for (int i=1; i<=MAX_N; i++) delete [] A[i]; delete [] A; - if (inp != &std::cin) { - delete inp; - } - if (out != &std::cout) { - delete inp; - } + if (inp != &std::cin) { + delete inp; + } + if (out != &std::cout) { + delete inp; + } exit(0); }