mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
Initial check-in.
This commit is contained in:
parent
584626a767
commit
784654c831
69
moses/TranslationModel/UG/mm/mtt-count-words.cc
Normal file
69
moses/TranslationModel/UG/mm/mtt-count-words.cc
Normal file
@ -0,0 +1,69 @@
|
||||
// count words in a memory-mapped corpus
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <iomanip>
|
||||
#include "ug_typedefs.h"
|
||||
#include "tpt_pickler.h"
|
||||
// #include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
|
||||
// #include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
|
||||
// #include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
|
||||
#include <algorithm>
|
||||
#include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace Moses;
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
// typedef mmTSA<Token>::tree_iterator iter;
|
||||
typedef boost::unordered_map<pair<size_t,size_t>,size_t> phrase_counter_t;
|
||||
|
||||
#define CACHING_THRESHOLD 1000
|
||||
|
||||
mmTtrack<Token> T; // token tracks
|
||||
TokenIndex V; // vocabs
|
||||
// mmTSA<Token> I; // suffix arrays
|
||||
|
||||
void interpret_args(int ac, char* av[]);
|
||||
string bname;
|
||||
bool echo;
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
T.open(bname+".mct");
|
||||
V.open(bname+".tdx");
|
||||
vector<size_t> cnt(V.ksize(),0);
|
||||
for (size_t sid = 0; sid < T.size(); ++sid)
|
||||
{
|
||||
Token const* stop = T.sntEnd(sid);
|
||||
for (Token const* t = T.sntStart(sid); t < stop; ++cnt[(t++)->id()]);
|
||||
}
|
||||
for (size_t wid = 2; wid < V.ksize(); ++wid)
|
||||
cout << V[wid] << " " << cnt[wid] << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
namespace po=boost::program_options;
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
po::options_description h("Hidden Options");
|
||||
po::positional_options_description a;
|
||||
|
||||
o.add_options()
|
||||
("help,h", "print this message")
|
||||
;
|
||||
|
||||
h.add_options()
|
||||
("bname", po::value<string>(&bname), "base name")
|
||||
;
|
||||
a.add("bname",1);
|
||||
get_options(ac,av,h.add(o),a,vm);
|
||||
}
|
64
moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc
Normal file
64
moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc
Normal file
@ -0,0 +1,64 @@
|
||||
// -*- c++ -*-
|
||||
// test program for dynamic tsas
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
#include <boost/program_options/parsers.hpp>
|
||||
#include <boost/program_options/variables_map.hpp>
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "ug_conll_record.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_deptree.h"
|
||||
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
|
||||
#include "ug_im_ttrack.h"
|
||||
#include "ug_bitext.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace Moses;
|
||||
using namespace boost;
|
||||
using namespace Moses::bitext;
|
||||
namespace po=boost::program_options;
|
||||
|
||||
typedef L2R_Token<SimpleWordId> L2R;
|
||||
|
||||
int main()
|
||||
{
|
||||
sptr<imBitext<L2R> > bt(new imBitext<L2R>());
|
||||
string s1,s2,aln;
|
||||
vector<string> S1,S2,ALN;
|
||||
while (getline(cin,s1) && getline(cin,s2) && getline(cin,aln))
|
||||
{
|
||||
S1.push_back(s1);
|
||||
S2.push_back(s2);
|
||||
ALN.push_back(aln);
|
||||
}
|
||||
bt = bt->add(S1,S2,ALN);
|
||||
|
||||
TSA<L2R>::tree_iterator m(bt->I2.get());
|
||||
m.down();
|
||||
do {
|
||||
char const* p = m.lower_bound(-1);
|
||||
tsa::ArrayEntry I(p);
|
||||
do {
|
||||
m.root->readEntry(I.next,I);
|
||||
L2R const* stop = m.root->getCorpus()->sntEnd(I.sid);
|
||||
for (L2R const* t = m.root->getCorpus()->getToken(I); t < stop; ++t)
|
||||
cout << (*bt->V2)[t->id()] << " ";
|
||||
cout << endl;
|
||||
} while (I.next < m.upper_bound(-1));
|
||||
} while (m.over());
|
||||
}
|
Loading…
Reference in New Issue
Block a user