Adapted lookup_mmsapt to binary phrase tables; renamed the utility to ptable-lookup

This commit is contained in:
Ulrich Germann 2014-06-27 01:06:00 +01:00
parent e4c35b9896
commit 40ab68b3d2
2 changed files with 129 additions and 2 deletions

View File

@ -9,8 +9,8 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe lookup_mmsapt :
lookup_mmsapt.cc
exe ptable-lookup :
ptable-lookup.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams

View File

@ -0,0 +1,127 @@
#include "mmsapt.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
using namespace Moses;
using namespace bitext;
using namespace std;
using namespace boost;
vector<FactorType> fo(1,FactorType(0));
class SimplePhrase : public Moses::Phrase
{
vector<FactorType> const m_fo; // factor order
public:
SimplePhrase(): m_fo(1,FactorType(0)) {}
void init(string const& s)
{
istringstream buf(s); string w;
while (buf >> w)
{
Word wrd;
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
}
}
};
class TargetPhraseIndexSorter
{
TargetPhraseCollection const& my_tpc;
CompareTargetPhrase cmp;
public:
TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
bool operator()(size_t a, size_t b) const
{
return cmp(*my_tpc[a], *my_tpc[b]);
}
};
int main(int argc, char* argv[])
{
Parameter params;
if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
StaticData const& global = StaticData::Instance();
global.SetVerboseLevel(0);
vector<FactorType> ifo = global.GetInputFactorOrder();
PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
PhraseDictionaryTreeAdaptor* pdta = dynamic_cast<PhraseDictionaryTreeAdaptor*>(PT);
// vector<FeatureFunction*> const& ffs = FeatureFunction::GetFeatureFunctions();
if (!mmsapt && !pdta)
{
cerr << "Phrase table implementation not supported by this utility." << endl;
exit(1);
}
string line;
while (true)
{
Sentence phrase;
if (!phrase.Read(cin,ifo)) break;
if (pdta)
{
pdta->InitializeForInput(phrase);
// do we also need to call CleanupAfterSentenceProcessing at the end?
}
Phrase& p = phrase;
cout << p << endl;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
if (!trg) continue;
vector<size_t> order(trg->GetSize());
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg));
size_t k = 0;
// size_t precision =
cout.precision(2);
vector<string> fname;
if (mmsapt)
{
fname = mmsapt->GetFeatureNames();
cout << " ";
BOOST_FOREACH(string const& fn, fname)
cout << " " << format("%10.10s") % fn;
cout << endl;
}
BOOST_FOREACH(size_t i, order)
{
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
cout << setw(3) << ++k << " " << phr << endl;
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
FVector const& scores = scc.GetScoresVector();
cout << " ";
for (size_t k = idx.first; k < idx.second; ++k)
{
if (mmsapt && fname[k-idx.first].substr(0,3) == "log")
{
if(scores[k] < 0)
cout << " " << format("%10d") % round(exp(-scores[k]));
else
cout << " " << format("%10d") % round(exp(scores[k]));
}
else
cout << " " << format("%10.8f") % exp(scores[k]);
}
cout << endl;
}
PT->Release(trg);
}
exit(0);
}