2006-07-15 04:13:27 +04:00
|
|
|
#include <iostream>
|
2006-07-21 17:22:00 +04:00
|
|
|
//#include <fstream>
|
2006-07-15 04:13:27 +04:00
|
|
|
#include <sstream>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <iterator>
|
|
|
|
#include <functional>
|
|
|
|
#include <sys/stat.h>
|
2006-07-18 04:12:51 +04:00
|
|
|
#include "TypeDef.h"
|
2006-07-15 04:13:27 +04:00
|
|
|
#include "PhraseDictionaryTree.h"
|
2006-07-18 04:12:51 +04:00
|
|
|
#include "ConfusionNet.h"
|
|
|
|
#include "FactorCollection.h"
|
|
|
|
#include "Phrase.h"
|
2006-07-21 17:22:00 +04:00
|
|
|
#include "InputFileStream.h"
|
2006-07-28 22:14:20 +04:00
|
|
|
#include "Timer.h"
|
|
|
|
|
2008-09-12 22:19:41 +04:00
|
|
|
using namespace std;
|
2006-07-28 22:14:20 +04:00
|
|
|
Timer timer;
|
2006-07-18 04:12:51 +04:00
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
|
|
|
|
{
|
|
|
|
out<<x.size()<<" ";
|
|
|
|
typename std::vector<T>::const_iterator iend=x.end();
|
|
|
|
for(typename std::vector<T>::const_iterator i=x.begin();i!=iend;++i)
|
|
|
|
out<<*i<<' ';
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2006-07-15 04:13:27 +04:00
|
|
|
inline bool existsFile(const char* filename) {
|
|
|
|
struct stat mystat;
|
|
|
|
return (stat(filename,&mystat)==0);
|
|
|
|
}
|
|
|
|
inline bool existsFile(const std::string& filename) {
|
|
|
|
return existsFile(filename.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc,char **argv) {
|
2006-07-18 04:12:51 +04:00
|
|
|
std::string fto;size_t noScoreComponent=5;int cn=0;
|
2008-09-12 22:19:41 +04:00
|
|
|
bool aligninfo=false;
|
2006-07-18 04:12:51 +04:00
|
|
|
std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
|
2006-07-21 17:22:00 +04:00
|
|
|
int verb=0;
|
2006-07-15 04:13:27 +04:00
|
|
|
for(int i=1;i<argc;++i) {
|
|
|
|
std::string s(argv[i]);
|
2006-07-18 04:12:51 +04:00
|
|
|
if(s=="-ttable") {
|
|
|
|
std::pair<char*,char*> p;
|
|
|
|
p.first=argv[++i];
|
|
|
|
p.second=argv[++i];
|
|
|
|
ftts.push_back(std::make_pair(std::string(argv[++i]),p));
|
|
|
|
}
|
2006-07-15 04:13:27 +04:00
|
|
|
else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
|
|
|
|
else if(s=="-out") fto=std::string(argv[++i]);
|
2006-07-18 04:12:51 +04:00
|
|
|
else if(s=="-cn") cn=1;
|
|
|
|
else if(s=="-irst") cn=2;
|
2008-09-12 22:19:41 +04:00
|
|
|
else if(s=="-alignment-info") aligninfo=true;
|
2006-07-21 17:22:00 +04:00
|
|
|
else if(s=="-v") verb=atoi(argv[++i]);
|
2006-07-15 04:13:27 +04:00
|
|
|
else if(s=="-h")
|
|
|
|
{
|
|
|
|
std::cerr<<"usage "<<argv[0]<<" :\n\n"
|
|
|
|
"options:\n"
|
2006-07-18 04:12:51 +04:00
|
|
|
"\t-ttable int int string -- translation table file, use '-' for stdin\n"
|
2006-07-15 04:13:27 +04:00
|
|
|
"\t-out string -- output file name prefix for binary ttable\n"
|
2006-07-18 04:12:51 +04:00
|
|
|
"\t-nscores int -- number of scores in ttable\n"
|
2008-09-12 22:19:41 +04:00
|
|
|
"\t-alignment-info -- include alignment info in the binary ttable (suffix \".wa\")\n"
|
|
|
|
"\nfunctions:\n"
|
2006-07-15 04:13:27 +04:00
|
|
|
"\t - convert ascii ttable in binary format\n"
|
|
|
|
"\t - if ttable is not read from stdin:\n"
|
|
|
|
"\t treat each line as source phrase an print tgt candidates\n"
|
|
|
|
"\n";
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-09-12 22:19:41 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
if(ftts.size()) {
|
2008-09-12 22:19:41 +04:00
|
|
|
|
|
|
|
if(ftts.size()==1){
|
|
|
|
std::cerr<<"processing ptree for ";
|
|
|
|
PhraseDictionaryTree pdt(noScoreComponent);
|
|
|
|
|
|
|
|
pdt.PrintWordAlignment(aligninfo);
|
2006-07-15 04:13:27 +04:00
|
|
|
|
2008-09-12 22:19:41 +04:00
|
|
|
if (ftts[0].first=="-") {
|
|
|
|
std::cerr<< "stdin\n";
|
|
|
|
pdt.Create(std::cin,fto);
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
std::cerr<< ftts[0].first << "\n";
|
|
|
|
InputFileStream in(ftts[0].first);
|
|
|
|
pdt.Create(in,fto);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2006-07-15 04:13:27 +04:00
|
|
|
{
|
2006-07-28 22:14:20 +04:00
|
|
|
#if 0
|
2006-07-18 04:12:51 +04:00
|
|
|
std::vector<PhraseDictionaryTree const*> pdicts;
|
|
|
|
std::vector<FactorType> factorOrder;
|
|
|
|
for(size_t i=0;i<ftts.size();++i) {
|
2006-07-28 22:14:20 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
|
|
|
|
&factorCollection,
|
|
|
|
getFactorType(atoi(ftts[i].second.first)),
|
|
|
|
getFactorType(atoi(ftts[i].second.second))
|
|
|
|
);
|
|
|
|
factorOrder.push_back(pdtptr->GetInputFactorType());
|
|
|
|
PhraseDictionaryTree &pdt=*pdtptr;
|
|
|
|
pdicts.push_back(pdtptr);
|
|
|
|
|
|
|
|
std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
|
|
|
|
std::string prefix=ftts[i].first+facStr;
|
|
|
|
if(!existsFile(prefix+".binphr.idx")) {
|
|
|
|
std::cerr<<"bin ttable does not exist -> create it\n";
|
2006-07-21 17:22:00 +04:00
|
|
|
InputFileStream in(prefix);
|
2006-07-18 04:12:51 +04:00
|
|
|
pdt.Create(in,prefix);
|
|
|
|
}
|
|
|
|
std::cerr<<"reading bin ttable\n";
|
|
|
|
pdt.Read(prefix);
|
2008-09-12 22:19:41 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
}
|
2006-07-15 04:13:27 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
std::cerr<<"processing stdin\n";
|
|
|
|
if(!cn) {
|
|
|
|
std::string line;
|
|
|
|
while(getline(std::cin,line)) {
|
|
|
|
std::istringstream is(line);
|
|
|
|
#if 0
|
|
|
|
std::vector<std::string> f;
|
|
|
|
std::copy(std::istream_iterator<std::string>(is),
|
|
|
|
std::istream_iterator<std::string>(),
|
|
|
|
std::back_inserter(f));
|
|
|
|
#endif
|
|
|
|
std::cerr<<"got source phrase '"<<line<<"'\n";
|
|
|
|
|
|
|
|
Phrase F(Input);
|
|
|
|
F.CreateFromString(factorOrder,line,factorCollection);
|
|
|
|
|
|
|
|
for(size_t k=0;k<pdicts.size();++k) {
|
|
|
|
PhraseDictionaryTree const& pdt=*pdicts[k];
|
|
|
|
|
|
|
|
std::vector<std::string> f(F.GetSize());
|
|
|
|
for(size_t i=0;i<F.GetSize();++i)
|
|
|
|
f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();
|
|
|
|
|
|
|
|
std::stringstream iostA,iostB;
|
|
|
|
std::cerr<<"full phrase processing "<<f<<"\n";
|
|
|
|
pdt.PrintTargetCandidates(f,iostA);
|
2006-07-15 04:13:27 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
std::cerr<<"processing with prefix ptr\n";
|
|
|
|
PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());
|
|
|
|
|
|
|
|
for(size_t i=0;i<f.size() && p;++i) {
|
|
|
|
std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
|
|
|
|
p=pdt.Extend(p,f[i]);
|
|
|
|
std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
|
|
|
|
}
|
|
|
|
if(p) {
|
|
|
|
std::cerr<<"retrieving candidates from prefix ptr\n";
|
|
|
|
pdt.PrintTargetCandidates(p,iostB);}
|
|
|
|
else {
|
|
|
|
std::cerr<<"final ptr is invalid\n";
|
|
|
|
iostB<<"there are 0 target candidates\n";
|
|
|
|
}
|
|
|
|
if(iostA.str() != iostB.str())
|
|
|
|
std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";
|
2006-07-15 04:13:27 +04:00
|
|
|
|
2006-07-18 04:12:51 +04:00
|
|
|
std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
|
|
|
|
pdt.FreeMemory();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2006-07-15 04:13:27 +04:00
|
|
|
}
|
2006-07-18 04:12:51 +04:00
|
|
|
else {
|
|
|
|
// process confusion net input
|
|
|
|
ConfusionNet net(&factorCollection);
|
2006-07-19 04:16:29 +04:00
|
|
|
std::vector<std::vector<float> > weights;
|
|
|
|
for(size_t i=0;i<pdicts.size();++i)
|
|
|
|
weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));
|
2006-07-18 04:12:51 +04:00
|
|
|
|
2006-07-24 22:33:08 +04:00
|
|
|
while(net.ReadF(std::cin,factorOrder,cn-1)) {
|
2006-07-18 04:12:51 +04:00
|
|
|
net.Print(std::cerr);
|
2006-07-21 17:22:00 +04:00
|
|
|
GenerateCandidates(net,pdicts,weights,verb);
|
2006-07-18 04:12:51 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2006-07-28 22:14:20 +04:00
|
|
|
#else
|
|
|
|
std::cerr<<"ERROR: these functions are currently broken...\n";
|
|
|
|
exit(1);
|
|
|
|
#endif
|
|
|
|
}
|
2006-07-15 04:13:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|