Merge ../mosesdecoder.merge-cmd

This commit is contained in:
Hieu Hoang 2014-12-11 14:14:11 +00:00
commit 411bd41fba
15 changed files with 179 additions and 153 deletions

View File

@ -5,12 +5,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -48,6 +48,7 @@
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
@ -86,12 +87,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -156,4 +157,5 @@
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -300,6 +300,16 @@ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream)
WriteSearchGraph(writer);
}
void ChartManager::OutputBest(OutputCollector *collector) const
{
const ChartHypothesis *bestHypo = GetBestHypothesis();
if (collector && bestHypo) {
const size_t translationId = m_source.GetTranslationId();
const ChartHypothesis *bestHypo = GetBestHypothesis();
OutputBestHypo(collector, bestHypo, translationId);
}
}
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
@ -807,4 +817,61 @@ void ChartManager::OutputSearchGraphHypergraph() const
}
}
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
{
if (!collector)
return;
std::ostringstream out;
FixPrecision(out);
if (hypo != NULL) {
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
out << "||| ";
}
Phrase outPhrase(ARRAY_SIZE_INCR);
hypo->GetOutputPhrase(outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
string output = outPhrase.GetStringRep(outputFactorOrder);
out << output << endl;
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
out << endl;
}
collector->Write(translationId, out.str());
}
void ChartManager::Backtrack(const ChartHypothesis *hypo) const
{
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
vector<const ChartHypothesis*>::const_iterator iter;
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
const ChartHypothesis *prevHypo = *iter;
VERBOSE(3,prevHypo->GetId() << " <= ");
Backtrack(prevHypo);
}
}
} // namespace Moses

View File

@ -99,6 +99,8 @@ private:
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const;
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
void Backtrack(const ChartHypothesis *hypo) const;
public:
ChartManager(InputType const& source);
@ -143,6 +145,7 @@ public:
const ChartParser &GetParser() const { return m_parser; }
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const
{}

View File

@ -278,97 +278,6 @@ std::map<size_t, const Factor*> IOWrapper::GetPlaceholders(const Hypothesis &hyp
return ret;
}
void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
{
if (!m_singleBestOutputCollector)
return;
std::ostringstream out;
FixPrecision(out);
if (hypo != NULL) {
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
out << "||| ";
}
Phrase outPhrase(ARRAY_SIZE_INCR);
hypo->GetOutputPhrase(outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
string output = outPhrase.GetStringRep(outputFactorOrder);
out << output << endl;
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
out << endl;
}
m_singleBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
{
if (!m_singleBestOutputCollector) return;
std::ostringstream out;
FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) {
out << applied.GetScore() << ' ';
}
Phrase outPhrase;
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
m_singleBestOutputCollector->Write(translationId, out.str());
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << endl);
}
void IOWrapper::OutputBestNone(long translationId)
{
if (!m_singleBestOutputCollector) return;
if (StaticData::Instance().GetOutputHypoScore()) {
m_singleBestOutputCollector->Write(translationId, "0 \n");
} else {
m_singleBestOutputCollector->Write(translationId, "\n");
}
}
void IOWrapper::Backtrack(const ChartHypothesis *hypo)
{
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
vector<const ChartHypothesis*>::const_iterator iter;
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
const ChartHypothesis *prevHypo = *iter;
VERBOSE(3,prevHypo->GetId() << " <= ");
Backtrack(prevHypo);
}
}
void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
{
if (hypo != NULL) {
@ -872,38 +781,5 @@ void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solu
OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
}
////////////////////////////
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
long translationId)
{
if (!m_singleBestOutputCollector) {
return;
}
std::ostringstream out;
FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
out << best->score << " ";
}
Phrase yield = Syntax::GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
}
m_singleBestOutputCollector->Write(translationId, out.str());
}
} // namespace

View File

@ -103,7 +103,6 @@ protected:
// CHART
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
void Backtrack(const ChartHypothesis *hypo);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@ -171,11 +170,6 @@ public:
// CHART
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
void OutputBestHypo(search::Applied applied, long translationId);
void OutputBestHypo(const Moses::Syntax::SHyperedge *, long translationId);
void OutputBestNone(long translationId);
// phrase-based
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);

View File

@ -283,6 +283,20 @@ const std::vector<search::Applied> &Manager::GetNBest() const
return *completed_nbest_;
}
void Manager::OutputBest(OutputCollector *collector) const
{
const long translationId = m_source.GetTranslationId();
const std::vector<search::Applied> &nbest = GetNBest();
if (!nbest.empty()) {
OutputBestHypo(collector, nbest[0], translationId);
}
else {
OutputBestNone(collector, translationId);
}
}
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector == NULL) {
@ -465,6 +479,38 @@ void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
}
}
void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const
{
if (collector == NULL) return;
std::ostringstream out;
FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) {
out << applied.GetScore() << ' ';
}
Phrase outPhrase;
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
collector->Write(translationId, out.str());
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
}
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
{
if (collector == NULL) return;
if (StaticData::Instance().GetOutputHypoScore()) {
collector->Write(translationId, "0 \n");
} else {
collector->Write(translationId, "\n");
}
}
namespace
{

View File

@ -40,6 +40,7 @@ public:
}
// output
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
@ -98,6 +99,8 @@ private:
const search::Applied *applied,
const Sentence &sentence,
long translationId) const;
void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
void OutputBestNone(OutputCollector *collector, long translationId) const;
};

View File

@ -499,7 +499,7 @@ bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
return a->GetWordsBitmap().GetNumWordsCovered() < b->GetWordsBitmap().GetNumWordsCovered();
}
void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList,
vector<LatticeMBRSolution>& solutions, size_t n)
{
const StaticData& staticData = StaticData::Instance();
@ -546,7 +546,7 @@ void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
}
vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
vector<Word> doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList)
{
vector<LatticeMBRSolution> solutions;
@ -554,7 +554,7 @@ vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
return solutions.at(0).GetWords();
}
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList)
const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList)
{
static const int BLEU_ORDER = 4;
static const float SMOOTH = 1;

View File

@ -137,15 +137,15 @@ void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*,
const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
//Use the ngram scores to rerank the nbest list, return at most n solutions
void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
void getLatticeMBRNBest(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
float>& finalNgramScores, bool posteriors);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
std::vector<Moses::Word> doLatticeMBR(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
}

View File

@ -1450,6 +1450,11 @@ SentenceStats& Manager::GetSentenceStats() const
}
void Manager::OutputBest(OutputCollector *collector) const
{
}
void Manager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();

View File

@ -187,6 +187,7 @@ public:
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputAlignment(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const;

View File

@ -388,6 +388,40 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer());
}
template<typename Parser>
void Manager<Parser>::OutputBest(OutputCollector *collector) const
{
if (!collector) {
return;
}
const Syntax::SHyperedge *best = GetBestSHyperedge();
const long translationId = m_source.GetTranslationId();
std::ostringstream out;
FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
out << best->score << " ";
}
Phrase yield = Syntax::GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
}
collector->Write(translationId, out.str());
}
template<typename Parser>
void Manager<Parser>::OutputDetailedTranslationReport(
OutputCollector *collector) const

View File

@ -42,6 +42,7 @@ class Manager : public Syntax::Manager
std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
bool onlyDistinct=false) const;
void OutputBest(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
private:

View File

@ -90,6 +90,8 @@ void TranslationTask::RunPb()
additionalReportingTime.stop();
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
// apply decision rule and output best translation(s)
if (m_ioWrapper.GetSingleBestOutputCollector()) {
ostringstream out;
@ -268,17 +270,9 @@ void TranslationTask::RunChart()
if (staticData.GetSearchAlgorithm() == ChartIncremental) {
Incremental::Manager manager(*m_source);
manager.Decode();
const std::vector<search::Applied> &nbest = manager.GetNBest();
if (!nbest.empty()) {
m_ioWrapper.OutputBestHypo(nbest[0], translationId);
manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
} else {
m_ioWrapper.OutputBestNone(translationId);
}
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
return;
@ -293,8 +287,8 @@ void TranslationTask::RunChart()
manager.OutputSearchGraphHypergraph();
// 1-best
const ChartHypothesis *bestHypo = manager.GetBestHypothesis();
m_ioWrapper.OutputBestHypo(bestHypo, translationId);
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
IFVERBOSE(2) {
PrintUserTime("Best Hypothesis Generation Time:");
}

View File

@ -51,8 +51,8 @@ private:
Syntax::S2T::Manager<Parser> manager(*m_source);
manager.Decode();
// 1-best
const Syntax::SHyperedge *best = manager.GetBestSHyperedge();
m_ioWrapper.OutputBestHypo(best, translationId);
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
// n-best
manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());