//////////////////////////////////////////////////////////// // // generate set of target candidates for confusion net // //////////////////////////////////////////////////////////// #include #include "Word.h" #include "Phrase.h" #include "ConfusionNet.h" #include "WordsRange.h" #include "PhraseDictionaryTree.h" using namespace Moses; #if 0 // Generates all tuples from n indexes with ranges 0 to card[j]-1, respectively.. // Input: number of indexes and ranges: ranges[0] ... ranges[num_idx-1] // Output: number of tuples and monodimensional array of tuples. // Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2) size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples) { unsigned* single_tuple= new unsigned[num_idx+1]; unsigned num_tuples=1; for (unsigned k=0;k vPPtr; typedef std::vector > mPhrase; std::ostream& operator<<(std::ostream& out,const mPhrase& p) { for(size_t i=0;iToString()<<" "; out<<"|"; } return out; } struct State { vPPtr ptrs; WordsRange range; float score; State() : range(0,0),score(0.0) {} State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {} size_t begin() const {return range.GetStartPos();} size_t end() const {return range.GetEndPos();} float GetScore() const {return score;} }; std::ostream& operator<<(std::ostream& out,const State& s) { out<<"["< E2Costs; struct GCData { const std::vector& pdicts; const std::vector >& weights; std::vector inF,outF; size_t distinctOutputFactors; vPPtr root; size_t totalTuples,distinctTuples; GCData(const std::vector& a, const std::vector >& b) : pdicts(a),weights(b),totalTuples(0),distinctTuples(0) { assert(pdicts.size()==weights.size()); std::set distinctOutFset; inF.resize(pdicts.size()); outF.resize(pdicts.size()); root.resize(pdicts.size()); for(size_t i=0;iGetRoot(); inF[i]=pdicts[i]->GetInputFactorType(); outF[i]=pdicts[i]->GetOutputFactorType(); distinctOutFset.insert(pdicts[i]->GetOutputFactorType()); } distinctOutputFactors=distinctOutFset.size(); } FactorType OutFT(size_t i) const {return outF[i];} FactorType InFT(size_t i) const {return inF[i];} size_t DistinctOutFactors() const {return distinctOutputFactors;} const vPPtr& GetRoot() const {return root;} }; typedef std::vector vFactor; typedef std::vector > TgtCandList; typedef std::vector OutputFactor2TgtCandList; typedef std::vector Len2Cands; void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands) { std::vector cands; data.pdicts[factorType]->GetTargetCandidates(pptr,cands); for(std::vector::const_iterator cand=cands.begin();cand!=cands.end();++cand) { assert(data.weights[factorType].size()==cand->second.size()); float costs=std::inner_product(data.weights[factorType].begin(), data.weights[factorType].end(), cand->second.begin(), 0.0); size_t len=cand->first.size(); if(len>=len2cands.size()) len2cands.resize(len+1,0); if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors()); OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len]; outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first)); } } void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data) { // check if candidates are non-empty bool gotCands=1; for(size_t j=0;gotCands && j radix(data.DistinctOutFactors()); for(size_t i=0;i const& mycand=tCand[j][tuples[radix.size()*i+j]]; e[j]=mycand.second; costs+=mycand.first; } #ifdef DEBUG bool mismatch=0; for(size_t j=1;!mismatch && j p=e2costs.insert(std::make_pair(e,costs)); if(p.second) ++data.distinctTuples; else { // entry known, take min of costs, alternative: sum probs if(costssecond) p.first->second=costs; } } delete [] tuples; } } void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data) { Len2Cands len2cands; // generate candidates for each element of nextP: for(size_t factorType=0;factorType& pdicts, const std::vector >& weights, int verbose) { GCData data(pdicts,weights); std::vector stack; for(size_t i=0;i cov2E; // std::cerr<<"start while loop. initial stack size: "<Extend(nextP[j], w.GetFactor(data.InFT(j))->GetString()); bool valid=1; for(size_t j=0;j::const_iterator i=cov2E.begin(); i!=cov2E.end();++i) { std::cerr<first<<" -- distinct cands: " <second.size()<<"\n"; } std::cerr<<"\n\n"; } if(verbose>10) { std::cerr<<"full list:\n"; for(std::map::const_iterator i=cov2E.begin(); i!=cov2E.end();++i) { std::cerr<first<<" -- distinct cands: " <second.size()<<"\n"; for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j) std::cerr<first<<" -- "<second<<"\n"; } } } #else void GenerateCandidates(const ConfusionNet&, const std::vector&, const std::vector >&, int) { std::cerr<<"ERROR: GenerateCandidates is currently broken\n"; } #endif