mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
215 lines
6.6 KiB
C++
215 lines
6.6 KiB
C++
#include "mmsapt.h"
|
|
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
|
#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
|
|
#include <boost/foreach.hpp>
|
|
#include <boost/format.hpp>
|
|
#include <boost/tokenizer.hpp>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
|
|
using namespace Moses;
|
|
using namespace bitext;
|
|
using namespace std;
|
|
using namespace boost;
|
|
|
|
vector<FactorType> fo(1,FactorType(0));
|
|
|
|
class SimplePhrase : public Moses::Phrase
|
|
{
|
|
vector<FactorType> const m_fo; // factor order
|
|
public:
|
|
SimplePhrase(): m_fo(1,FactorType(0)) {}
|
|
|
|
void init(string const& s)
|
|
{
|
|
istringstream buf(s); string w;
|
|
while (buf >> w)
|
|
{
|
|
Word wrd;
|
|
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
|
|
}
|
|
}
|
|
};
|
|
|
|
class TargetPhraseIndexSorter
|
|
{
|
|
TargetPhraseCollection const& my_tpc;
|
|
CompareTargetPhrase cmp;
|
|
public:
|
|
TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
|
|
bool operator()(size_t a, size_t b) const
|
|
{
|
|
// return cmp(*my_tpc[a], *my_tpc[b]);
|
|
return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
|
|
my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
|
|
}
|
|
};
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
|
|
string vlevel = "alt"; // verbosity level
|
|
vector<pair<string,int> > argfilter(5);
|
|
argfilter[0] = std::make_pair(string("--spe-src"),1);
|
|
argfilter[1] = std::make_pair(string("--spe-trg"),1);
|
|
argfilter[2] = std::make_pair(string("--spe-aln"),1);
|
|
argfilter[3] = std::make_pair(string("--spe-show"),1);
|
|
|
|
char** my_args; int my_acnt;
|
|
char** mo_args; int mo_acnt;
|
|
filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
|
|
|
|
ifstream spe_src,spe_trg,spe_aln;
|
|
// instead of translating show coverage by phrase tables
|
|
for (int i = 0; i < my_acnt; i += 2)
|
|
{
|
|
if (!strcmp(my_args[i],"--spe-src"))
|
|
spe_src.open(my_args[i+1]);
|
|
else if (!strcmp(my_args[i],"--spe-trg"))
|
|
spe_trg.open(my_args[i+1]);
|
|
else if (!strcmp(my_args[i],"--spe-aln"))
|
|
spe_aln.open(my_args[i+1]);
|
|
else if (!strcmp(my_args[i],"--spe-show"))
|
|
vlevel = my_args[i+1];
|
|
}
|
|
|
|
Parameter params;
|
|
if (!params.LoadParam(mo_acnt,mo_args) ||
|
|
!StaticData::LoadDataStatic(¶ms, mo_args[0]))
|
|
exit(1);
|
|
|
|
StaticData const& global = StaticData::Instance();
|
|
global.SetVerboseLevel(0);
|
|
vector<FactorType> ifo = global.GetInputFactorOrder();
|
|
|
|
PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
|
|
Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
|
|
if (!mmsapt)
|
|
{
|
|
cerr << "Phrase table implementation not supported by this utility." << endl;
|
|
exit(1);
|
|
}
|
|
mmsapt->SetTableLimit(0);
|
|
|
|
string srcline,trgline,alnline;
|
|
cout.precision(2);
|
|
vector<string> fname = mmsapt->GetFeatureNames();
|
|
while (getline(spe_src,srcline))
|
|
{
|
|
UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
|
|
<< ": missing data for online updates.");
|
|
UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
|
|
<< ": missing data for online updates.");
|
|
cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
|
|
|
|
// cout << srcline << " " << HERE << endl;
|
|
Sentence snt;
|
|
istringstream buf(srcline+"\n");
|
|
if (!snt.Read(buf,ifo)) break;
|
|
// cout << Phrase(snt) << endl;
|
|
int dynprovidx = -1;
|
|
for (size_t i = 0; i < fname.size(); ++i)
|
|
{
|
|
if (fname[i].substr(0,7) == "prov-1.")
|
|
dynprovidx = i;
|
|
}
|
|
cout << endl;
|
|
for (size_t i = 0; i < snt.GetSize(); ++i)
|
|
{
|
|
for (size_t k = i; k < snt.GetSize(); ++k)
|
|
{
|
|
Phrase p = snt.GetSubString(WordsRange(i,k));
|
|
if (!mmsapt->PrefixExists(p)) break;
|
|
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
|
|
if (!trg || !trg->GetSize()) continue;
|
|
|
|
bool header_done = false;
|
|
bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
|
|
vector<size_t> order; order.reserve(trg->GetSize());
|
|
size_t stop = trg->GetSize();
|
|
|
|
vector<size_t> o2(trg->GetSize());
|
|
for (size_t i = 0; i < stop; ++i) o2[i] = i;
|
|
sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
|
|
|
|
for (size_t r = 0; r < stop; ++r) // r for rank
|
|
{
|
|
if (vlevel != "ALL")
|
|
{
|
|
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
|
|
ostringstream buf; buf << phr;
|
|
string tphrase = buf.str();
|
|
tphrase.erase(tphrase.size()-1);
|
|
size_t s = trgline.find(tphrase);
|
|
if (s == string::npos) continue;
|
|
size_t e = s + tphrase.size();
|
|
if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
|
|
continue;
|
|
}
|
|
order.push_back(r);
|
|
if (!has_dynamic_match)
|
|
{
|
|
ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
|
|
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
|
|
FVector const& scores = scc.GetScoresVector();
|
|
has_dynamic_match = scores[idx.first + dynprovidx] > 0;
|
|
}
|
|
}
|
|
if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match)
|
|
continue;
|
|
|
|
|
|
BOOST_FOREACH(size_t const& r, order)
|
|
{
|
|
ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
|
|
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
|
|
FVector const& scores = scc.GetScoresVector();
|
|
float wscore = scc.GetWeightedScore();
|
|
if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
|
|
continue;
|
|
if (!header_done)
|
|
{
|
|
cout << endl;
|
|
if (trg->GetSize() == 1)
|
|
cout << p << " (1 translation option)" << endl;
|
|
else
|
|
cout << p << " (" << trg->GetSize() << " translation options)" << endl;
|
|
header_done = true;
|
|
}
|
|
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
|
|
cout << setw(3) << r+1 << " " << phr << endl;
|
|
cout << " ";
|
|
BOOST_FOREACH(string const& fn, fname)
|
|
cout << " " << format("%10.10s") % fn;
|
|
cout << endl;
|
|
cout << " ";
|
|
for (size_t x = idx.first; x < idx.second; ++x)
|
|
{
|
|
size_t j = x-idx.first;
|
|
float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x];
|
|
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
|
|
if (fname[j].substr(0,3) == "lex") fmt = "%10.3e";
|
|
if (fname[j].substr(0,7) == "prov-1.")
|
|
{
|
|
f = round(f/(1-f));
|
|
fmt = "%10d";
|
|
}
|
|
cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
|
|
}
|
|
cout << " " << format("%10.3e") % exp(wscore)
|
|
<< " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
|
|
}
|
|
mmsapt->Release(trg);
|
|
continue;
|
|
}
|
|
}
|
|
mmsapt->add(srcline,trgline,alnline);
|
|
}
|
|
// }
|
|
exit(0);
|
|
}
|
|
|
|
|
|
|