2014-07-09 05:39:28 +04:00
|
|
|
#include "mmsapt.h"
|
|
|
|
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
|
|
|
#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
|
|
|
|
#include <boost/foreach.hpp>
|
|
|
|
#include <boost/format.hpp>
|
|
|
|
#include <boost/tokenizer.hpp>
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
using namespace Moses;
|
|
|
|
using namespace bitext;
|
|
|
|
using namespace std;
|
|
|
|
using namespace boost;
|
|
|
|
|
|
|
|
typedef L2R_Token<SimpleWordId> Token;
|
|
|
|
typedef mmBitext<Token> mmbitext;
|
|
|
|
typedef imBitext<Token> imbitext;
|
|
|
|
typedef Bitext<Token>::iter iter;
|
|
|
|
|
|
|
|
mmbitext bg;
|
|
|
|
|
2015-04-30 08:05:11 +03:00
|
|
|
void
|
2014-07-09 05:39:28 +04:00
|
|
|
show(ostream& out, iter& f)
|
|
|
|
{
|
|
|
|
iter b(bg.I2.get(),f.getToken(0),f.size());
|
|
|
|
if (b.size() == f.size())
|
|
|
|
out << setw(12) << int(round(b.approxOccurrenceCount()));
|
|
|
|
else
|
|
|
|
out << string(12,' ');
|
|
|
|
out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
|
2015-04-30 08:05:11 +03:00
|
|
|
out << f.str(bg.V1.get()) << endl;
|
2014-07-09 05:39:28 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-04-30 08:05:11 +03:00
|
|
|
void
|
2014-07-09 05:39:28 +04:00
|
|
|
dump(ostream& out, iter& f)
|
|
|
|
{
|
|
|
|
float cnt = f.size() ? f.approxOccurrenceCount() : 0;
|
|
|
|
if (f.down())
|
|
|
|
{
|
|
|
|
cnt = f.approxOccurrenceCount();
|
|
|
|
do { dump(out,f); }
|
|
|
|
while (f.over());
|
|
|
|
f.up();
|
|
|
|
}
|
2015-04-30 08:05:11 +03:00
|
|
|
if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
|
2014-07-09 05:39:28 +04:00
|
|
|
show(out,f);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-04-30 08:05:11 +03:00
|
|
|
void
|
2014-07-09 05:39:28 +04:00
|
|
|
read_data(string fname, vector<string>& dest)
|
|
|
|
{
|
|
|
|
ifstream in(fname.c_str());
|
|
|
|
string line;
|
|
|
|
while (getline(in,line)) dest.push_back(line);
|
|
|
|
in.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
bg.open(argv[1],argv[2],argv[3]);
|
|
|
|
sptr<imbitext> fg(new imbitext(bg.V1,bg.V2));
|
|
|
|
vector<string> src,trg,aln;
|
|
|
|
read_data(argv[4],src);
|
|
|
|
read_data(argv[5],trg);
|
|
|
|
read_data(argv[6],aln);
|
|
|
|
fg = fg->add(src,trg,aln);
|
|
|
|
iter mfg(fg->I1.get());
|
|
|
|
dump(cout,mfg);
|
|
|
|
exit(0);
|
|
|
|
}
|
2015-04-30 08:05:11 +03:00
|
|
|
|
|
|
|
|
2014-07-09 05:39:28 +04:00
|
|
|
|