code cleanup & enable trace ("-t") for MBR decoding ("-mbr")

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3246 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
phkoehn 2010-05-10 21:18:47 +00:00
parent 524b1b12d2
commit f52a2cc55d
19 changed files with 100 additions and 60 deletions

View File

@ -59,7 +59,7 @@ protected:
const HypoList *m_orderedHypos;
public:
ChildEntry(size_t pos, const HypoList &orderedHypos, const Moses::Word &headWord)
ChildEntry(size_t pos, const HypoList &orderedHypos, const Moses::Word & /*headWord*/ )
:m_pos(pos)
,m_orderedHypos(&orderedHypos)
//,m_headWord(headWord)

View File

@ -108,9 +108,9 @@ IOWrapper::~IOWrapper()
delete m_detailedTranslationReportingStream;
}
void IOWrapper::Initialization(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder*/
, const std::vector<FactorType> &/*outputFactorOrder*/
, const FactorMask &/*inputFactorUsed*/
, size_t nBestSize
, const std::string &nBestFilePath)
{
@ -224,9 +224,22 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
}
}
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out)
{
const std::vector<const Hypothesis *> &edges = path.GetEdges();
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge.GetCurrTargetPhrase(), StaticData::Instance().GetOutputFactorOrder(), reportAllFactors);
if (reportSegmentation == true
&& edge.GetCurrTargetPhrase().GetSize() > 0) {
out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
out << endl;
}
void IOWrapper::Backtrack(const Hypothesis *hypo){
@ -236,18 +249,7 @@ void IOWrapper::Backtrack(const Hypothesis *hypo){
}
}
void OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, ostream& out)
{
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++)
{
const Factor *factor = mbrBestHypo[i];
if (i>0) out << " ";
out << factor->GetString();
}
out << endl;
}
void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, ostream& out)
void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
{
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++)

View File

@ -58,12 +58,12 @@ protected:
const std::vector<Moses::FactorType> &m_inputFactorOrder;
const std::vector<Moses::FactorType> &m_outputFactorOrder;
const Moses::FactorMask &m_inputFactorUsed;
std::string m_inputFilePath;
Moses::InputFileStream *m_inputFile;
std::istream *m_inputStream;
std::ostream *m_nBestStream
,*m_outputWordGraphStream,*m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
Moses::InputFileStream *m_inputFile;
bool m_surpressSingleBestOutput;
void Initialization(const std::vector<Moses::FactorType> &inputFactorOrder
@ -89,9 +89,9 @@ public:
Moses::InputType* GetInput(Moses::InputType *inputType);
void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const Moses::TrellisPathList &nBestList, long translationId);
void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
void Backtrack(const Moses::Hypothesis *hypo);
void ResetTranslationId() { m_translationId = 0; }
@ -116,9 +116,8 @@ bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::Inpu
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&, long translationId);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
#endif

View File

@ -548,7 +548,7 @@ vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList) {
return solutions.at(0).GetWords();
}
vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList) {
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList) {
static const int BLEU_ORDER = 4;
static const float SMOOTH = 1;
@ -636,9 +636,10 @@ vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList) {
}
}
vector<Word> bestWords;
GetOutputWords(**best,bestWords);
return bestWords;
return **best;
//vector<Word> bestWords;
//GetOutputWords(**best,bestWords);
//return bestWords;
}

View File

@ -143,5 +143,6 @@ void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
std::vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
//std::vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
#endif

View File

@ -174,9 +174,11 @@ int main(int argc, char* argv[])
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
}
}
// output best translation using posterior methods (MBR, Lattice MBR, Consensus)
else {
// always need n-best list -> get it
size_t nBestSize = staticData.GetMBRSize();
if (nBestSize <= 0)
{
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
@ -186,26 +188,37 @@ int main(int argc, char* argv[])
manager.CalcNBest(nBestSize, nBestList,true);
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
IFVERBOSE(2) { PrintUserTime("calculated n-best list for (L)MBR decoding"); }
if (!staticData.GetNBestFilePath().empty()) {
if (!staticData.GetNBestFilePath().empty())
{
vector<LatticeMBRSolution> solutions;
size_t n = min(nBestSize, staticData.GetNBestSize());
getLatticeMBRNBest(manager,nBestList,solutions,n);
VERBOSE(2,"WRITING " << solutions.size() << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
ioWrapper->OutputLatticeMBRNBestList(solutions,source->GetTranslationId());
} else if (staticData.UseLatticeMBR()) {
}
// lattice MBR
else if (staticData.UseLatticeMBR())
{
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
OutputBestHypo(mbrBestHypo, source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),cout);
IFVERBOSE(2) { PrintUserTime("finished Lattice MBR decoding"); }
} else if (staticData.UseConsensusDecoding()) {
std::vector<Word> conBestHypo = doConsensusDecoding(manager,nBestList);
}
// consensus decoding
else if (staticData.UseConsensusDecoding())
{
//std::vector<Word> conBestHypo = doConsensusDecoding(manager,nBestList);
const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
OutputBestHypo(conBestHypo, source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),cout);
IFVERBOSE(2) { PrintUserTime("finished Consensus decoding"); }
} else {
std::vector<const Factor*> mbrBestHypo = doMBR(nBestList);
}
// n-best MBR
else
{
const TrellisPath &mbrBestHypo = doMBR(nBestList);
OutputBestHypo(mbrBestHypo, source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),cout);

View File

@ -105,7 +105,8 @@ class TranslationTask : public Task {
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector) {}
void Run() {
void Run()
{
#if defined(BOOST_HAS_PTHREADS)
TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << (int)pthread_self() << std::endl);
#endif
@ -132,9 +133,12 @@ class TranslationTask : public Task {
}
}
out << endl;
} else {
}
else
{
size_t nBestSize = staticData.GetMBRSize();
if (nBestSize <= 0) {
if (nBestSize <= 0)
{
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
exit(1);
}
@ -143,8 +147,10 @@ class TranslationTask : public Task {
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
IFVERBOSE(2) { PrintUserTime("calculated n-best list for (L)MBR decoding"); }
if (staticData.UseLatticeMBR()) {
if (m_nbestCollector) {
if (staticData.UseLatticeMBR())
{
if (m_nbestCollector)
{
//lattice mbr nbest
vector<LatticeMBRSolution> solutions;
size_t n = min(nBestSize, staticData.GetNBestSize());
@ -152,16 +158,20 @@ class TranslationTask : public Task {
ostringstream out;
OutputLatticeMBRNBest(out, solutions,m_lineNumber);
m_nbestCollector->Write(m_lineNumber, out.str());
} else {
}
else
{
//Lattice MBR decoding
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
IFVERBOSE(2) { PrintUserTime("finished Lattice MBR decoding"); }
}
} else {
}
else
{
//MBR decoding
std::vector<const Factor*> mbrBestHypo = doMBR(nBestList);
const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
OutputBestHypo(mbrBestHypo, m_lineNumber,
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);

View File

@ -95,7 +95,7 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in
return exp(logbleu);
}
vector<const Factor*> doMBR(const TrellisPathList& nBestList){
const TrellisPath doMBR(const TrellisPathList& nBestList){
float marginal = 0;
vector<float> joint_prob_vec;
@ -160,7 +160,8 @@ vector<const Factor*> doMBR(const TrellisPathList& nBestList){
iter++;
}
/* Find sentence that minimises Bayes Risk under 1- BLEU loss */
return translations[minMBRLossIdx];
return nBestList.at(minMBRLossIdx);
//return translations[minMBRLossIdx];
}
void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation){

View File

@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifndef moses_cmd_mbr_h
#define moses_cmd_mbr_h
std::vector<const Moses::Factor*> doMBR(const Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <const Moses::Factor*> &translation);
float calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats );
#endif

View File

@ -147,7 +147,7 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
// initial position is not further into the sentence than the distortion limit.
if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0)
{
if (transOptRange.GetStartPos() <= maxDistortion)
if ((int)transOptRange.GetStartPos() <= maxDistortion)
m_hypotheses.push_back(&hypo);
}
else

View File

@ -82,8 +82,8 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit) const
, TranslationOptionCollection * /* toc */
, bool /*adhereTableLimit*/) const
{
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
{ // word deletion

View File

@ -256,7 +256,7 @@ void HypothesisStackCubePruning::CleanupArcList()
void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
, HypothesisStackCubePruning &stack
, const WordsRange &range
, const WordsRange &/*range*/
, BitmapContainer &bitmapContainer
, const SquareMatrix &futureScore
, const TranslationOptionList &transOptList)

View File

@ -54,7 +54,7 @@ int InputType::ComputeDistortionDistance(const WordsRange& prev, const WordsRang
return abs(dist);
}
bool InputType::CanIGetFromAToB(size_t start, size_t end) const
bool InputType::CanIGetFromAToB(size_t /*start*/, size_t /*end*/) const
{
return true;
}

View File

@ -171,7 +171,7 @@ struct LMState : public FFState {
}
};
const FFState* LanguageModel::EmptyHypothesisState(const InputType &input) const {
const FFState* LanguageModel::EmptyHypothesisState(const InputType &/*input*/) const {
return new LMState(NULL);
}

View File

@ -119,7 +119,7 @@ bool LanguageModelInternal::Load(const std::string &filePath
float LanguageModelInternal::GetValue(const std::vector<const Word*> &contextFactor
, State* finalState
, unsigned int* len) const
, unsigned int* /*len*/) const
{
const size_t ngram = contextFactor.size();
switch (ngram)

View File

@ -597,14 +597,14 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
}
}
void LexicalReorderingTableTree::Cache(const ConfusionNet& input){
void LexicalReorderingTableTree::Cache(const ConfusionNet& /*input*/){
return;
}
void LexicalReorderingTableTree::Cache(const Sentence& input){
//only works with sentences...
int prev_cache_size = m_Cache.size();
int max_phrase_length = input.GetSize();
size_t prev_cache_size = m_Cache.size();
size_t max_phrase_length = input.GetSize();
for(size_t len = 0; len <= max_phrase_length; ++len){
for(size_t start = 0; start+len <= input.GetSize(); ++start){
Phrase f = input.GetSubString(WordsRange(start, start+len));

View File

@ -69,6 +69,16 @@ public:
{
return m_collection.size();
}
const TrellisPath at(size_t position) const
{
const_iterator iter = m_collection.begin();
for(size_t i = position; i>0; i--)
{
iter++;
}
return **iter;
}
};
}

View File

@ -58,7 +58,7 @@ protected:
//sets elements by vector
void Initialize(std::vector<bool> vector)
{
int vector_size = vector.size();
size_t vector_size = vector.size();
for (size_t pos = 0 ; pos < m_size ; pos++)
{
if (pos < vector_size && vector[pos] == true) m_bitmap[pos] = true;

View File

@ -77,7 +77,10 @@ string TrimXml(const string& str)
*/
bool isXmlTag(const string& tag)
{
return tag[0] == '<';
return (tag[0] == '<' &&
(tag[1] == '/' ||
(tag[1] >= 'a' && tag[1] <= 'z') ||
(tag[1] >= 'A' && tag[1] <= 'Z')));
}
/**