better handling of cache-based models with inconsistent parameters

This commit is contained in:
Nicola Bertoldi 2014-12-15 17:42:41 +01:00
commit 4e77665d30
37 changed files with 524 additions and 429 deletions

View File

@ -5,12 +5,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -48,6 +48,7 @@
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
@ -86,12 +87,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -156,4 +157,5 @@
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -1855,6 +1855,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/Syntax/KBestExtractor.h</locationURI>
</link>
<link>
<name>Syntax/Manager.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/Syntax/Manager.cpp</locationURI>
</link>
<link>
<name>Syntax/Manager.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/Syntax/Manager.h</locationURI>
</link>
<link>
<name>Syntax/NonTerminalMap.h</name>
<type>1</type>

View File

@ -56,19 +56,15 @@ public:
m_transOptRange(transOptRange) {
m_totalWeightDistortion = 0;
const StaticData &staticData = StaticData::Instance();
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
std::vector<FeatureFunction*>::const_iterator iter;
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
const FeatureFunction *ff = *iter;
const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
if (model) {
float weight =staticData.GetAllWeights().GetScoreForProducer(model);
m_totalWeightDistortion += weight;
}
const std::vector<const DistortionScoreProducer*> &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions();
std::vector<const DistortionScoreProducer*>::const_iterator iter;
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
const DistortionScoreProducer *ff = *iter;
float weight =staticData.GetAllWeights().GetScoreForProducer(ff);
m_totalWeightDistortion += weight;
}
}
const WordsRange* m_transOptRange;
@ -223,7 +219,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
bool
BackwardsEdge::SeenPosition(const size_t x, const size_t y)
{
std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
boost::unordered_set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
return (iter != m_seenPosition.end());
}

View File

@ -33,6 +33,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "WordsBitmap.h"
#include <boost/unordered_set.hpp>
namespace Moses
{
@ -165,7 +167,7 @@ private:
const SquareMatrix &m_futurescore;
std::vector< const Hypothesis* > m_hypotheses;
std::set< int > m_seenPosition;
boost::unordered_set< int > m_seenPosition;
// We don't want to instantiate "empty" objects.
BackwardsEdge();

View File

@ -300,6 +300,16 @@ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream)
WriteSearchGraph(writer);
}
void ChartManager::OutputBest(OutputCollector *collector) const
{
const ChartHypothesis *bestHypo = GetBestHypothesis();
if (collector && bestHypo) {
const size_t translationId = m_source.GetTranslationId();
const ChartHypothesis *bestHypo = GetBestHypothesis();
OutputBestHypo(collector, bestHypo, translationId);
}
}
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
@ -807,4 +817,61 @@ void ChartManager::OutputSearchGraphHypergraph() const
}
}
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
{
if (!collector)
return;
std::ostringstream out;
FixPrecision(out);
if (hypo != NULL) {
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
out << "||| ";
}
Phrase outPhrase(ARRAY_SIZE_INCR);
hypo->GetOutputPhrase(outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
string output = outPhrase.GetStringRep(outputFactorOrder);
out << output << endl;
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
out << endl;
}
collector->Write(translationId, out.str());
}
void ChartManager::Backtrack(const ChartHypothesis *hypo) const
{
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
vector<const ChartHypothesis*>::const_iterator iter;
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
const ChartHypothesis *prevHypo = *iter;
VERBOSE(3,prevHypo->GetId() << " <= ");
Backtrack(prevHypo);
}
}
} // namespace Moses

View File

@ -99,6 +99,8 @@ private:
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const;
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
void Backtrack(const ChartHypothesis *hypo) const;
public:
ChartManager(InputType const& source);
@ -143,6 +145,7 @@ public:
const ChartParser &GetParser() const { return m_parser; }
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const
{}

View File

@ -118,7 +118,7 @@ void BleuScoreFeature::SetParameter(const std::string& key, const std::string& v
}
string line;
while (getline(in,line)) {
/* if (GetSearchAlgorithm() == ChartDecoding) {
/* if (GetSearchAlgorithm() == CYKPlus) {
stringstream tmp;
tmp << "<s> " << line << " </s>";
line = tmp.str();

View File

@ -43,7 +43,7 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
void ConstrainedDecoding::Load()
{
const StaticData &staticData = StaticData::Instance();
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == ChartDecoding) || (staticData.GetSearchAlgorithm() == ChartIncremental);
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
for(size_t i = 0; i < m_paths.size(); ++i) {
InputFileStream constraintFile(m_paths[i]);

View File

@ -22,9 +22,12 @@ struct DistortionState_traditional : public FFState {
}
};
std::vector<const DistortionScoreProducer*> DistortionScoreProducer::s_staticColl;
DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
: StatefulFeatureFunction(1, line)
{
s_staticColl.push_back(this);
ReadParameters();
}

View File

@ -16,7 +16,14 @@ class WordsRange;
*/
class DistortionScoreProducer : public StatefulFeatureFunction
{
protected:
static std::vector<const DistortionScoreProducer*> s_staticColl;
public:
static const std::vector<const DistortionScoreProducer*>& GetDistortionFeatureFunctions() {
return s_staticColl;
}
DistortionScoreProducer(const std::string &line);
bool IsUseable(const FactorMask &mask) const {

View File

@ -78,8 +78,8 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
void Print() const;
protected:
static DynamicCacheBasedLanguageModel *s_instance;
static std::map< const std::string, DynamicCacheBasedLanguageModel * > s_instance_map;
static DynamicCacheBasedLanguageModel* s_instance;
static std::map< const std::string, DynamicCacheBasedLanguageModel* > s_instance_map;
public:
DynamicCacheBasedLanguageModel(const std::string &line);
@ -92,15 +92,18 @@ public:
m_name = name;
}
static const DynamicCacheBasedLanguageModel& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
return s_instance_map[name];
}
static DynamicCacheBasedLanguageModel& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
return s_instance_map[name];
}
static const DynamicCacheBasedLanguageModel& Instance() {
return *s_instance;
}

View File

@ -7,6 +7,7 @@
#include "moses/Manager.h"
#include "moses/TranslationOption.h"
#include "moses/Util.h"
#include "moses/FF/DistortionScoreProducer.h"
using namespace std;

View File

@ -22,6 +22,7 @@ class WordsRange;
class FactorMask;
class InputPath;
class StackVec;
class DistortionScoreProducer;
/** base class for all feature functions.
*/
@ -46,6 +47,7 @@ public:
static const std::vector<FeatureFunction*>& GetFeatureFunctions() {
return s_staticColl;
}
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();

View File

@ -278,97 +278,6 @@ std::map<size_t, const Factor*> IOWrapper::GetPlaceholders(const Hypothesis &hyp
return ret;
}
void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
{
if (!m_singleBestOutputCollector)
return;
std::ostringstream out;
FixPrecision(out);
if (hypo != NULL) {
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
out << "||| ";
}
Phrase outPhrase(ARRAY_SIZE_INCR);
hypo->GetOutputPhrase(outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
string output = outPhrase.GetStringRep(outputFactorOrder);
out << output << endl;
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
out << endl;
}
m_singleBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
{
if (!m_singleBestOutputCollector) return;
std::ostringstream out;
FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) {
out << applied.GetScore() << ' ';
}
Phrase outPhrase;
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
m_singleBestOutputCollector->Write(translationId, out.str());
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << endl);
}
void IOWrapper::OutputBestNone(long translationId)
{
if (!m_singleBestOutputCollector) return;
if (StaticData::Instance().GetOutputHypoScore()) {
m_singleBestOutputCollector->Write(translationId, "0 \n");
} else {
m_singleBestOutputCollector->Write(translationId, "\n");
}
}
void IOWrapper::Backtrack(const ChartHypothesis *hypo)
{
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
vector<const ChartHypothesis*>::const_iterator iter;
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
const ChartHypothesis *prevHypo = *iter;
VERBOSE(3,prevHypo->GetId() << " <= ");
Backtrack(prevHypo);
}
}
void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
{
if (hypo != NULL) {
@ -872,38 +781,5 @@ void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solu
OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
}
////////////////////////////
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
long translationId)
{
if (!m_singleBestOutputCollector) {
return;
}
std::ostringstream out;
FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
out << best->score << " ";
}
Phrase yield = Syntax::GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
}
m_singleBestOutputCollector->Write(translationId, out.str());
}
} // namespace

View File

@ -103,7 +103,6 @@ protected:
// CHART
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
void Backtrack(const ChartHypothesis *hypo);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@ -171,11 +170,6 @@ public:
// CHART
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
void OutputBestHypo(search::Applied applied, long translationId);
void OutputBestHypo(const Moses::Syntax::SHyperedge *, long translationId);
void OutputBestNone(long translationId);
// phrase-based
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);

View File

@ -283,6 +283,20 @@ const std::vector<search::Applied> &Manager::GetNBest() const
return *completed_nbest_;
}
void Manager::OutputBest(OutputCollector *collector) const
{
const long translationId = m_source.GetTranslationId();
const std::vector<search::Applied> &nbest = GetNBest();
if (!nbest.empty()) {
OutputBestHypo(collector, nbest[0], translationId);
}
else {
OutputBestNone(collector, translationId);
}
}
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector == NULL) {
@ -465,6 +479,38 @@ void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
}
}
void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const
{
if (collector == NULL) return;
std::ostringstream out;
FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) {
out << applied.GetScore() << ' ';
}
Phrase outPhrase;
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
collector->Write(translationId, out.str());
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
}
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
{
if (collector == NULL) return;
if (StaticData::Instance().GetOutputHypoScore()) {
collector->Write(translationId, "0 \n");
} else {
collector->Write(translationId, "\n");
}
}
namespace
{

View File

@ -40,6 +40,7 @@ public:
}
// output
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
@ -98,6 +99,8 @@ private:
const search::Applied *applied,
const Sentence &sentence,
long translationId) const;
void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
void OutputBestNone(OutputCollector *collector, long translationId) const;
};

View File

@ -19,7 +19,7 @@ if $(with-irstlm) {
dependencies += irst ;
lmmacros += LM_IRST ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.80.06 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
}

View File

@ -499,7 +499,7 @@ bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
return a->GetWordsBitmap().GetNumWordsCovered() < b->GetWordsBitmap().GetNumWordsCovered();
}
void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList,
vector<LatticeMBRSolution>& solutions, size_t n)
{
const StaticData& staticData = StaticData::Instance();
@ -546,7 +546,7 @@ void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
}
vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
vector<Word> doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList)
{
vector<LatticeMBRSolution> solutions;
@ -554,7 +554,7 @@ vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
return solutions.at(0).GetWords();
}
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList)
const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList)
{
static const int BLEU_ORDER = 4;
static const float SMOOTH = 1;

View File

@ -137,15 +137,15 @@ void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*,
const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
//Use the ngram scores to rerank the nbest list, return at most n solutions
void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
void getLatticeMBRNBest(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
float>& finalNgramScores, bool posteriors);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
std::vector<Moses::Word> doLatticeMBR(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
}

View File

@ -1450,6 +1450,11 @@ SentenceStats& Manager::GetSentenceStats() const
}
void Manager::OutputBest(OutputCollector *collector) const
{
}
void Manager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();

View File

@ -187,6 +187,7 @@ public:
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputAlignment(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const;

View File

@ -25,8 +25,6 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
return new SearchNormal(manager,source, transOptColl);
case CubePruning:
return new SearchCubePruning(manager, source, transOptColl);
case CubeGrowing:
return NULL;
case NormalBatch:
return new SearchNormalBatch(manager, source, transOptColl);
default:

View File

@ -141,7 +141,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cbtm = &PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it);
}
if ((*dlt_meta_it)["type"] == "cblm") {
@ -149,7 +149,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cblm = &DynamicCacheBasedLanguageModel::InstanceNonConst(id);
cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
if (cblm) cblm->ExecuteDlt(*dlt_meta_it);
}
}

View File

@ -436,7 +436,7 @@ public:
return m_searchAlgorithm;
}
bool IsChart() const {
return m_searchAlgorithm == ChartDecoding || m_searchAlgorithm == ChartIncremental;
return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental;
}
const ScoreComponentCollection& GetAllWeights() const {

View File

@ -5,6 +5,7 @@
#include <queue>
#include <vector>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>

206
moses/Syntax/Manager.cpp Normal file
View File

@ -0,0 +1,206 @@
#include "Manager.h"
#include <sstream>
#include "moses/OutputCollector.h"
#include "moses/StaticData.h"
#include "PVertex.h"
namespace Moses
{
namespace Syntax
{
Manager::Manager(const InputType &source)
: Moses::BaseManager(source)
{
}
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector) {
const StaticData &staticData = StaticData::Instance();
long translationId = m_source.GetTranslationId();
KBestExtractor::KBestVec nBestList;
ExtractKBest(staticData.GetNBestSize(), nBestList,
staticData.GetDistinctNBest());
OutputNBestList(collector, nBestList, translationId);
}
}
void Manager::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
std::ostringstream out;
for (std::set<Moses::Word>::const_iterator p = m_oovs.begin();
p != m_oovs.end(); ++p) {
out << *p;
}
out << std::endl;
collector->Write(translationId, out.str());
}
}
void Manager::OutputNBestList(OutputCollector *collector,
const KBestExtractor::KBestVec &nBestList,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<FactorType> &outputFactorOrder =
staticData.GetOutputFactorOrder();
std::ostringstream out;
if (collector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
FixPrecision(out);
}
bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
bool PrintNBestTrees = staticData.PrintNBestTrees();
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const KBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
// optionally, print word alignments
if (includeWordAlignment) {
out << " ||| ";
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
++q) {
out << q->first << "-" << q->second << " ";
}
}
// optionally, print tree
if (PrintNBestTrees) {
TreePointer tree = KBestExtractor::GetOutputTree(derivation);
out << " ||| " << tree->GetString();
}
out << std::endl;
}
assert(collector);
collector->Write(translationId, out.str());
}
std::size_t Manager::OutputAlignmentNBest(
Alignments &retAlign,
const KBestExtractor::Derivation &derivation,
std::size_t startTarget) const
{
const SHyperedge &shyperedge = derivation.edge->shyperedge;
std::size_t totalTargetSize = 0;
std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
const TargetPhrase &tp = *(shyperedge.translation);
std::size_t thisSourceSize = CalcSourceSize(derivation);
// position of each terminal word in translation rule, irrespective of
// alignment if non-term, number is undefined
std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
"Error");
std::size_t targetInd = 0;
for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
std::size_t sourceInd = targetPos2SourceInd[targetPos];
std::size_t sourcePos = sourceInd2pos[sourceInd];
const KBestExtractor::Derivation &subderivation =
*derivation.subderivations[sourceInd];
// calc source size
std::size_t sourceSize =
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
sourceOffsets[sourcePos] = sourceSize;
// calc target size.
// Recursively look thru child hypos
std::size_t currStartTarget = startTarget + totalTargetSize;
std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
++targetInd;
} else {
++totalTargetSize;
}
}
// convert position within translation rule to absolute position within
// source sentence / output sentence
ShiftOffsets(sourceOffsets, startSource);
ShiftOffsets(targetOffsets, startTarget);
// get alignments from this hypo
const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
// add to output arg, offsetting by source & target
AlignmentInfo::const_iterator iter;
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
const std::pair<std::size_t, std::size_t> &align = *iter;
std::size_t relSource = align.first;
std::size_t relTarget = align.second;
std::size_t absSource = sourceOffsets[relSource];
std::size_t absTarget = targetOffsets[relTarget];
std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
UTIL_THROW_IF2(!ret.second, "Error");
}
return totalTargetSize;
}
std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
{
const SHyperedge &shyperedge = d.edge->shyperedge;
std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
std::size_t childSize =
shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
ret -= (childSize - 1);
}
return ret;
}
} // Syntax
} // Moses

58
moses/Syntax/Manager.h Normal file
View File

@ -0,0 +1,58 @@
#pragma once
#include "moses/InputType.h"
#include "moses/BaseManager.h"
#include "KBestExtractor.h"
namespace Moses
{
namespace Syntax
{
// Common base class for Moses::Syntax managers.
class Manager : public BaseManager
{
public:
Manager(const InputType &);
// Virtual functions from Moses::BaseManager that are implemented the same
// way for all Syntax managers.
void OutputNBest(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const;
// Virtual functions from Moses::BaseManager that are no-ops for all Syntax
// managers.
void OutputLatticeSamples(OutputCollector *collector) const {}
void OutputAlignment(OutputCollector *collector) const {}
void OutputDetailedTreeFragmentsTranslationReport(
OutputCollector *collector) const {}
void OutputWordGraph(OutputCollector *collector) const {}
void OutputSearchGraph(OutputCollector *collector) const {}
void OutputSearchGraphSLF() const {}
void OutputSearchGraphHypergraph() const {}
// Syntax-specific virtual functions that derived classes must implement.
virtual void ExtractKBest(
std::size_t k,
std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
bool onlyDistinct=false) const = 0;
protected:
std::set<Word> m_oovs;
private:
// Syntax-specific helper functions used to implement OutputNBest.
void OutputNBestList(OutputCollector *collector,
const KBestExtractor::KBestVec &nBestList,
long translationId) const;
std::size_t OutputAlignmentNBest(Alignments &retAlign,
const KBestExtractor::Derivation &d,
std::size_t startTarget) const;
std::size_t CalcSourceSize(const KBestExtractor::Derivation &d) const;
};
} // Syntax
} // Moses

View File

@ -2,6 +2,7 @@
#include <iostream>
#include <sstream>
#include "moses/DecodeGraph.h"
#include "moses/StaticData.h"
#include "moses/Syntax/BoundedPriorityContainer.h"
@ -14,8 +15,8 @@
#include "moses/Syntax/SVertexRecombinationOrderer.h"
#include "moses/Syntax/SymbolEqualityPred.h"
#include "moses/Syntax/SymbolHasher.h"
#include "DerivationWriter.h"
#include "DerivationWriter.h"
#include "OovHandler.h"
#include "PChart.h"
#include "RuleTrie.h"
@ -30,7 +31,7 @@ namespace S2T
template<typename Parser>
Manager<Parser>::Manager(const InputType &source)
: BaseManager(source)
: Syntax::Manager(source)
, m_pchart(source.GetSize(), Parser::RequiresCompressedChart())
, m_schart(source.GetSize())
{
@ -44,7 +45,7 @@ void Manager<Parser>::InitializeCharts()
const Word &terminal = m_source.GetWord(i);
// PVertex
PVertex tmp(WordsRange(i,i), m_source.GetWord(i));
PVertex tmp(WordsRange(i,i), terminal);
PVertex &pvertex = m_pchart.AddVertex(tmp);
// SVertex
@ -262,6 +263,7 @@ const SHyperedge *Manager<Parser>::GetBestSHyperedge() const
}
assert(stacks.Size() == 1);
const std::vector<boost::shared_ptr<SVertex> > &stack = stacks.Begin()->second;
// TODO Throw exception if stack is empty? Or return 0?
return stack[0]->best;
}
@ -284,6 +286,7 @@ void Manager<Parser>::ExtractKBest(
}
assert(stacks.Size() == 1);
const std::vector<boost::shared_ptr<SVertex> > &stack = stacks.Begin()->second;
// TODO Throw exception if stack is empty? Or return 0?
KBestExtractor extractor;
@ -386,212 +389,51 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
}
template<typename Parser>
void Manager<Parser>::OutputNBest(OutputCollector *collector) const
void Manager<Parser>::OutputBest(OutputCollector *collector) const
{
if (collector) {
const StaticData &staticData = StaticData::Instance();
long translationId = m_source.GetTranslationId();
Syntax::KBestExtractor::KBestVec nBestList;
ExtractKBest(staticData.GetNBestSize(), nBestList,
staticData.GetDistinctNBest());
OutputNBestList(collector, nBestList, translationId);
}
}
template<typename Parser>
void Manager<Parser>::OutputDetailedTranslationReport(OutputCollector *collector) const
{
const SHyperedge *best = GetBestSHyperedge();
if (best == NULL || collector == NULL) {
if (!collector) {
return;
}
const Syntax::SHyperedge *best = GetBestSHyperedge();
const long translationId = m_source.GetTranslationId();
std::ostringstream out;
FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
out << best->score << " ";
}
Phrase yield = Syntax::GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
}
collector->Write(translationId, out.str());
}
template<typename Parser>
void Manager<Parser>::OutputDetailedTranslationReport(
OutputCollector *collector) const
{
const SHyperedge *best = GetBestSHyperedge();
if (best == NULL || collector == NULL) {
return;
}
long translationId = m_source.GetTranslationId();
std::ostringstream out;
Syntax::S2T::DerivationWriter::Write(*best, translationId, out);
DerivationWriter::Write(*best, translationId, out);
collector->Write(translationId, out.str());
}
template<typename Parser>
void Manager<Parser>::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
std::ostringstream out;
for (std::set<Moses::Word>::const_iterator p = m_oovs.begin();
p != m_oovs.end(); ++p) {
out << *p;
}
out << std::endl;
collector->Write(translationId, out.str());
}
}
template<typename Parser>
void Manager<Parser>::OutputNBestList(OutputCollector *collector,
const Syntax::KBestExtractor::KBestVec &nBestList,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
std::ostringstream out;
if (collector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
FixPrecision(out);
}
bool includeWordAlignment =
staticData.PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const Syntax::KBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
// optionally, print word alignments
if (includeWordAlignment) {
out << " ||| ";
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
++q) {
out << q->first << "-" << q->second << " ";
}
}
// optionally, print tree
if (PrintNBestTrees) {
TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
out << " ||| " << tree->GetString();
}
out << std::endl;
}
assert(collector);
collector->Write(translationId, out.str());
}
template<typename Parser>
size_t Manager<Parser>::OutputAlignmentNBest(
Alignments &retAlign,
const Syntax::KBestExtractor::Derivation &derivation,
size_t startTarget) const
{
const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
size_t totalTargetSize = 0;
size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
const TargetPhrase &tp = *(shyperedge.translation);
size_t thisSourceSize = CalcSourceSize(derivation);
// position of each terminal word in translation rule, irrespective of alignment
// if non-term, number is undefined
std::vector<size_t> sourceOffsets(thisSourceSize, 0);
std::vector<size_t> targetOffsets(tp.GetSize(), 0);
const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
std::vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
"Error");
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
const Moses::Syntax::KBestExtractor::Derivation &subderivation =
*derivation.subderivations[sourceInd];
// calc source size
size_t sourceSize =
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
sourceOffsets[sourcePos] = sourceSize;
// calc target size.
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
++targetInd;
} else {
++totalTargetSize;
}
}
// convert position within translation rule to absolute position within
// source sentence / output sentence
ShiftOffsets(sourceOffsets, startSource);
ShiftOffsets(targetOffsets, startTarget);
// get alignments from this hypo
const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
// add to output arg, offsetting by source & target
AlignmentInfo::const_iterator iter;
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
const std::pair<size_t,size_t> &align = *iter;
size_t relSource = align.first;
size_t relTarget = align.second;
size_t absSource = sourceOffsets[relSource];
size_t absTarget = targetOffsets[relTarget];
std::pair<size_t, size_t> alignPoint(absSource, absTarget);
std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
UTIL_THROW_IF2(!ret.second, "Error");
}
return totalTargetSize;
}
template<typename Parser>
size_t Manager<Parser>::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
{
using namespace Moses::Syntax;
const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
ret -= (childSize - 1);
}
return ret;
}
} // S2T

View File

@ -1,13 +1,15 @@
#pragma once
#include <set>
#include <vector>
#include <boost/shared_ptr.hpp>
#include "moses/InputType.h"
#include "moses/BaseManager.h"
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/Manager.h"
#include "moses/Syntax/SVertexStack.h"
#include "moses/Word.h"
#include "OovHandler.h"
#include "ParserCallback.h"
@ -19,14 +21,13 @@ namespace Moses
namespace Syntax
{
class SDerivation;
struct SHyperedge;
namespace S2T
{
template<typename Parser>
class Manager : public BaseManager
class Manager : public Syntax::Manager
{
public:
Manager(const InputType &);
@ -41,25 +42,8 @@ class Manager : public BaseManager
std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
bool onlyDistinct=false) const;
const std::set<Word> &GetUnknownWords() const { return m_oovs; }
void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const
{}
void OutputAlignment(OutputCollector *collector) const
{}
void OutputBest(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const;
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
{}
void OutputWordGraph(OutputCollector *collector) const
{}
void OutputSearchGraph(OutputCollector *collector) const
{}
void OutputSearchGraphSLF() const
{}
void OutputSearchGraphHypergraph() const
{}
private:
void FindOovs(const PChart &, std::set<Word> &, std::size_t);
@ -74,19 +58,8 @@ class Manager : public BaseManager
PChart m_pchart;
SChart m_schart;
std::set<Word> m_oovs;
boost::shared_ptr<typename Parser::RuleTrie> m_oovRuleTrie;
std::vector<boost::shared_ptr<Parser> > m_parsers;
// output
void OutputNBestList(OutputCollector *collector,
const Moses::Syntax::KBestExtractor::KBestVec &nBestList,
long translationId) const;
std::size_t OutputAlignmentNBest(Alignments &retAlign,
const Moses::Syntax::KBestExtractor::Derivation &derivation,
std::size_t startTarget) const;
size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
};
} // S2T

View File

@ -4,6 +4,7 @@
#include <boost/shared_ptr.hpp>
#include "moses/Phrase.h"
#include "moses/Syntax/RuleTableFF.h"
#include "moses/TargetPhrase.h"
#include "moses/Word.h"

View File

@ -85,16 +85,17 @@ public:
m_name = name;
}
static const PhraseDictionaryDynamicCacheBased& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
static const PhraseDictionaryDynamicCacheBased* Instance(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
return s_instance_map[name];
}
static PhraseDictionaryDynamicCacheBased& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
static PhraseDictionaryDynamicCacheBased* InstanceNonConst(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
return s_instance_map[name];
}
static const PhraseDictionaryDynamicCacheBased& Instance() {
return *s_instance;
}

View File

@ -90,6 +90,8 @@ void TranslationTask::RunPb()
additionalReportingTime.stop();
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
// apply decision rule and output best translation(s)
if (m_ioWrapper.GetSingleBestOutputCollector()) {
ostringstream out;
@ -268,17 +270,9 @@ void TranslationTask::RunChart()
if (staticData.GetSearchAlgorithm() == ChartIncremental) {
Incremental::Manager manager(*m_source);
manager.Decode();
const std::vector<search::Applied> &nbest = manager.GetNBest();
if (!nbest.empty()) {
m_ioWrapper.OutputBestHypo(nbest[0], translationId);
manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
} else {
m_ioWrapper.OutputBestNone(translationId);
}
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
return;
@ -293,8 +287,8 @@ void TranslationTask::RunChart()
manager.OutputSearchGraphHypergraph();
// 1-best
const ChartHypothesis *bestHypo = manager.GetBestHypothesis();
m_ioWrapper.OutputBestHypo(bestHypo, translationId);
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
IFVERBOSE(2) {
PrintUserTime("Best Hypothesis Generation Time:");
}

View File

@ -51,8 +51,8 @@ private:
Syntax::S2T::Manager<Parser> manager(*m_source);
manager.Decode();
// 1-best
const Syntax::SHyperedge *best = manager.GetBestSHyperedge();
m_ioWrapper.OutputBestHypo(best, translationId);
manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector());
// n-best
manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());

View File

@ -139,11 +139,11 @@ enum DictionaryFind {
enum SearchAlgorithm {
Normal = 0
,CubePruning = 1
,CubeGrowing = 2
,ChartDecoding= 3
,NormalBatch = 4
,ChartIncremental = 5
,CubePruning = 1
//,CubeGrowing = 2
,CYKPlus = 3
,NormalBatch = 4
,ChartIncremental = 5
};
enum SourceLabelOverlap {

View File

View File

@ -1120,6 +1120,7 @@ sub run_single_giza {
m2 => 0 ,
m3 => 3 ,
m4 => 3 ,
hmmiterations => 0 ,
o => "giza" ,
nodumps => 1 ,
onlyaldumps => 1 ,
@ -1141,7 +1142,6 @@ sub run_single_giza {
if ($_HMM_ALIGN) {
$GizaDefaultOptions{m3} = 0;
$GizaDefaultOptions{m4} = 0;
$GizaDefaultOptions{hmmiterations} = 5;
$GizaDefaultOptions{hmmdumpfrequency} = 5;
$GizaDefaultOptions{nodumps} = 0;
}
@ -2115,7 +2115,7 @@ sub create_ini {
my $path = `pwd`; chop($path);
$fn = $path."/".$fn;
}
$type = "SRILM" unless defined $type; # default to SRILM if no type given
$type = "KENLM" unless defined $type; # default to KENLM if no type given
if ($type =~ /^\d+$/) {
# backwards compatibility if the type is given not as string but as a number