Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Prashant Mathur 2017-07-20 14:04:32 +02:00
commit f07e60aece
540 changed files with 29831 additions and 29486 deletions

24
.travis.yml Normal file
View File

@ -0,0 +1,24 @@
sudo: false
dist: trusty
language: c
compiler: gcc
env:
matrix:
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- subversion
- automake
- libtool
- zlib1g-dev
- libbz2-dev
- liblzma-dev
- libboost-all-dev
- libgoogle-perftools-dev
- libxmlrpc-c++.*-dev
- cmake
- csh
script:
- ./bjam -j4

10
Jamroot
View File

@ -111,7 +111,7 @@ external-lib z ;
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
#requirements += <library>dl ;
#requirements += <cxxflags>-std=c++0x ;
requirements += <cxxflags>-std=c++0x ;
# Allow moses to report the git commit hash of the version used for compilation
moses_githash = [ _shell "git describe --dirty" ] ;
@ -183,7 +183,6 @@ requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
if [ option.get "with-oxlm" ] {
external-lib boost_serialization ;
external-lib gomp ;
requirements += <library>boost_serialization ;
requirements += <library>gomp ;
@ -317,7 +316,8 @@ contrib/c++tokenizer//tokenizer
contrib/expected-bleu-training//train-expected-bleu
contrib/expected-bleu-training//prepare-expected-bleu-training
contrib/moses2//programs
probingpt//programs
moses2//programs
;
@ -340,6 +340,6 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
#local temp = [ _shell "bash source ./s.sh" ] ;
local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -s CreateProbingPT CreateProbingPT2" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -sf moses moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -sf CreateProbingPT CreateProbingPT2" ] ;

View File

@ -91,11 +91,11 @@ $(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh):
rm -rf ${TMP}
# boost
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz/download
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.63.0/boost_1_63_0.tar.gz/download
boost: TMP=$(CWD)/build/boost
boost: override PREFIX=${BOOST_PREFIX}
boost: | $(call safepath,${BOOST_PREFIX}/include/boost)
$(call safepath,${BOOST_PREFIX}/include/boost):
$(sfget)
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
cd '${TMP}/boost_1_63_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
rm -rf ${TMP}

View File

@ -1,129 +0,0 @@
/*
* ArcList.cpp
*
* Created on: 26 Oct 2015
* Author: hieu
*/
#include <iostream>
#include <sstream>
#include <algorithm>
#include <boost/foreach.hpp>
#include "ArcLists.h"
#include "HypothesisBase.h"
#include "util/exception.hh"
using namespace std;
namespace Moses2
{
ArcLists::ArcLists()
{
// TODO Auto-generated constructor stub
}
ArcLists::~ArcLists()
{
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const ArcList *arcList = collPair.second;
delete arcList;
}
}
void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
const HypothesisBase *otherHypo)
{
//cerr << added << " " << currHypo << " " << otherHypo << endl;
ArcList *arcList;
if (added) {
// we're winners!
if (otherHypo) {
// there was a existing losing hypo
arcList = &GetAndDetachArcList(otherHypo);
}
else {
// there was no existing hypo
arcList = new ArcList;
}
m_coll[currHypo] = arcList;
}
else {
// we're losers!
// there should be a winner, we're not doing beam pruning
UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
arcList = &GetArcList(otherHypo);
}
// in any case, add the curr hypo
arcList->push_back(currHypo);
}
ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
{
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList &arcList = *iter->second;
return arcList;
}
const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
{
Coll::const_iterator iter = m_coll.find(hypo);
if (iter == m_coll.end()) {
cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const HypothesisBase *hypo = collPair.first;
cerr << hypo << " ";
}
}
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
ArcList &arcList = *iter->second;
return arcList;
}
ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
{
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList &arcList = *iter->second;
m_coll.erase(iter);
return arcList;
}
void ArcLists::Sort()
{
BOOST_FOREACH(Coll::value_type &collPair, m_coll){
ArcList &list = *collPair.second;
std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
}
}
void ArcLists::Delete(const HypothesisBase *hypo)
{
//cerr << "hypo=" << hypo->Debug() << endl;
//cerr << "m_coll=" << m_coll.size() << endl;
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList *arcList = iter->second;
m_coll.erase(iter);
delete arcList;
}
std::string ArcLists::Debug(const System &system) const
{
stringstream strm;
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const ArcList *arcList = collPair.second;
strm << arcList << "(" << arcList->size() << ") ";
}
return strm.str();
}
}

View File

@ -1,48 +0,0 @@
/*
* SkeletonStatefulFF.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#ifndef SKELETONSTATEFULFF_H_
#define SKELETONSTATEFULFF_H_
#include "StatefulFeatureFunction.h"
namespace Moses2
{
class SkeletonStatefulFF: public StatefulFeatureFunction
{
public:
SkeletonStatefulFF(size_t startInd, const std::string &line);
virtual ~SkeletonStatefulFF();
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
};
}
#endif /* SKELETONSTATEFULFF_H_ */

View File

@ -1,34 +0,0 @@
/*
* SkeletonStatefulFF.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#pragma once
#include "StatelessFeatureFunction.h"
namespace Moses2
{
class SkeletonStatelessFF: public StatelessFeatureFunction
{
public:
SkeletonStatelessFF(size_t startInd, const std::string &line);
virtual ~SkeletonStatelessFF();
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
};
}

View File

@ -1,189 +0,0 @@
/*
* HypothesisColl.cpp
*
* Created on: 26 Feb 2016
* Author: hieu
*/
#include <iostream>
#include <sstream>
#include <algorithm>
#include <boost/foreach.hpp>
#include "HypothesisColl.h"
#include "ManagerBase.h"
#include "System.h"
#include "MemPoolAllocator.h"
using namespace std;
namespace Moses2
{
HypothesisColl::HypothesisColl(const ManagerBase &mgr) :
m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())), m_sortedHypos(
NULL)
{
}
const HypothesisBase *HypothesisColl::GetBestHypo() const
{
if (GetSize() == 0) {
return NULL;
}
if (m_sortedHypos) {
return (*m_sortedHypos)[0];
}
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
const HypothesisBase *bestHypo;
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
if (hypo->GetFutureScore() > bestScore) {
bestScore = hypo->GetFutureScore();
bestHypo = hypo;
}
}
return bestHypo;
}
void HypothesisColl::Add(
const System &system,
HypothesisBase *hypo,
Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists)
{
StackAdd added = Add(hypo);
size_t nbestSize = system.options.nbest.nbest_size;
if (nbestSize) {
arcLists.AddArc(added.added, hypo, added.other);
}
else {
if (!added.added) {
hypoRecycle.Recycle(hypo);
}
else if (added.other) {
hypoRecycle.Recycle(added.other);
}
}
}
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
{
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
// CHECK RECOMBINATION
if (addRet.second) {
// equiv hypo doesn't exists
return StackAdd(true, NULL);
}
else {
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
// incoming hypo is better than the one we have
const HypothesisBase * const &hypoExisting1 = *addRet.first;
const HypothesisBase *&hypoExisting2 =
const_cast<const HypothesisBase *&>(hypoExisting1);
hypoExisting2 = hypo;
return StackAdd(true, hypoExisting);
}
else {
// already storing the best hypo. discard incoming hypo
return StackAdd(false, hypoExisting);
}
}
assert(false);
}
const Hypotheses &HypothesisColl::GetSortedAndPruneHypos(
const ManagerBase &mgr,
ArcLists &arcLists) const
{
if (m_sortedHypos == NULL) {
// create sortedHypos first
MemPool &pool = mgr.GetPool();
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
m_coll.size());
size_t ind = 0;
BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
(*m_sortedHypos)[ind] = hypo;
++ind;
}
SortAndPruneHypos(mgr, arcLists);
}
return *m_sortedHypos;
}
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const
{
UTIL_THROW_IF2(m_sortedHypos == NULL, "m_sortedHypos must be sorted beforehand");
return *m_sortedHypos;
}
void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr,
ArcLists &arcLists) const
{
size_t stackSize = mgr.system.options.search.stack_size;
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
/*
cerr << "UNSORTED hypos: ";
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
}
cerr << endl;
*/
Hypotheses::iterator iterMiddle;
iterMiddle =
(stackSize == 0 || m_sortedHypos->size() < stackSize) ?
m_sortedHypos->end() : m_sortedHypos->begin() + stackSize;
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
HypothesisFutureScoreOrderer());
// prune
if (stackSize && m_sortedHypos->size() > stackSize) {
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
recycler.Recycle(hypo);
// delete from arclist
if (mgr.system.options.nbest.nbest_size) {
arcLists.Delete(hypo);
}
}
m_sortedHypos->resize(stackSize);
}
/*
cerr << "sorted hypos: ";
for (size_t i = 0; i < m_sortedHypos->size(); ++i) {
const HypothesisBase *hypo = (*m_sortedHypos)[i];
cerr << hypo << " ";
}
cerr << endl;
*/
}
void HypothesisColl::Clear()
{
m_sortedHypos = NULL;
m_coll.clear();
}
std::string HypothesisColl::Debug(const System &system) const
{
stringstream out;
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
out << hypo->Debug(system);
out << std::endl << std::endl;
}
return out.str();
}
} /* namespace Moses2 */

View File

@ -1,246 +0,0 @@
/*
* LanguageModelDALM.cpp
*
* Created on: 5 Dec 2015
* Author: hieu
*/
#include "LanguageModelDALM.h"
#include "../TypeDef.h"
#include "../System.h"
#include "dalm.h"
#include "util/exception.hh"
#include "../legacy/InputFileStream.h"
using namespace std;
namespace Moses2
{
//////////////////////////////////////////////////////////////////////////////////////////
class Murmur: public DALM::State::HashFunction
{
public:
Murmur(std::size_t seed=0): seed(seed) {
}
virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const {
return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed);
}
private:
std::size_t seed;
};
//////////////////////////////////////////////////////////////////////////////////////////
class DALMState : public FFState
{
private:
DALM::State state;
public:
DALMState() {
}
DALMState(const DALMState &from) {
state = from.state;
}
virtual ~DALMState() {
}
void reset(const DALMState &from) {
state = from.state;
}
virtual int Compare(const FFState& other) const {
const DALMState &o = static_cast<const DALMState &>(other);
if(state.get_count() < o.state.get_count()) return -1;
else if(state.get_count() > o.state.get_count()) return 1;
else return state.compare(o.state);
}
virtual size_t hash() const {
// imitate KenLM
return state.hash(Murmur());
}
virtual bool operator==(const FFState& other) const {
const DALMState &o = static_cast<const DALMState &>(other);
return state.compare(o.state) == 0;
}
DALM::State &get_state() {
return state;
}
void refresh() {
state.refresh();
}
virtual std::string ToString() const
{ return "DALM state"; }
};
//////////////////////////////////////////////////////////////////////////////////////////////////////
inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
{
ifstream ifs(inifile);
string line;
getline(ifs, line);
while(ifs) {
unsigned int pos = line.find("=");
string key = line.substr(0, pos);
string value = line.substr(pos+1, line.size()-pos);
if(key=="MODEL") {
model = value;
} else if(key=="WORDS") {
words = value;
} else if(key=="WORDSTXT") {
wordstxt = value;
}
getline(ifs, line);
}
}
/////////////////////////
LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line)
:StatefulFeatureFunction(startInd, line)
{
ReadParameters();
}
LanguageModelDALM::~LanguageModelDALM() {
// TODO Auto-generated destructor stub
}
void LanguageModelDALM::Load(System &system)
{
/////////////////////
// READING INIFILE //
/////////////////////
string inifile= m_filePath + "/dalm.ini";
string model; // Path to the double-array file.
string words; // Path to the vocabulary file.
string wordstxt; //Path to the vocabulary file in text format.
read_ini(inifile.c_str(), model, words, wordstxt);
model = m_filePath + "/" + model;
words = m_filePath + "/" + words;
wordstxt = m_filePath + "/" + wordstxt;
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
util::FileOpenException,
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
////////////////
// LOADING LM //
////////////////
// Preparing a logger object.
m_logger = new DALM::Logger(stderr);
m_logger->setLevel(DALM::LOGGER_INFO);
// Load the vocabulary file.
m_vocab = new DALM::Vocabulary(words, *m_logger);
// Load the language model.
m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
wid_start = m_vocab->lookup(BOS_);
wid_end = m_vocab->lookup(EOS_);
// vocab mapping
CreateVocabMapping(wordstxt, system);
m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system);
}
void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system)
{
InputFileStream vocabStrm(wordstxt);
std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
string line;
std::size_t max_fid = 0;
while(getline(vocabStrm, line)) {
const Factor *factor = system.GetVocab().AddFactor(line, system);
std::size_t fid = factor->GetId();
DALM::VocabId wid = m_vocab->lookup(line.c_str());
vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
if(max_fid < fid) max_fid = fid;
}
for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
m_vocabMap[i] = m_vocab->unk();
}
m_vocabMap.resize(max_fid+1, m_vocab->unk());
std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
while(it != vlist.end()) {
std::pair<std::size_t, DALM::VocabId> &entry = *it;
m_vocabMap[entry.first] = entry.second;
++it;
}
}
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
{
if (key == "factor") {
m_factorType = Scan<FactorType>(value);
} else if (key == "order") {
m_nGramOrder = Scan<size_t>(value);
} else if (key == "path") {
m_filePath = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
m_ContextSize = m_nGramOrder-1;
}
FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const
{
DALMState *state = new DALMState();
return state;
}
void LanguageModelDALM::EmptyHypothesisState(FFState &state,
const ManagerBase &mgr,
const InputType &input,
const Hypothesis &hypo) const
{
DALMState &dalmState = static_cast<DALMState&>(state);
m_lm->init_state(dalmState.get_state());
}
void LanguageModelDALM::EvaluateInIsolation(MemPool &pool,
const System &system,
const Phrase &source,
const TargetPhraseImpl &targetPhrase,
Scores &scores,
SCORE &estimatedScore) const
{
}
void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo,
const FFState &prevState,
Scores &scores,
FFState &state) const
{
}
void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

View File

@ -1,75 +0,0 @@
/*
* LanguageModelDALM.h
*
* Created on: 5 Dec 2015
* Author: hieu
*/
#pragma once
#include "../FF/StatefulFeatureFunction.h"
#include "../legacy/Util2.h"
#include "../legacy/Factor.h"
namespace DALM
{
class Logger;
class Vocabulary;
class State;
class LM;
union Fragment;
class Gap;
typedef unsigned int VocabId;
}
namespace Moses2
{
class LanguageModelDALM: public StatefulFeatureFunction
{
public:
LanguageModelDALM(size_t startInd, const std::string &line);
virtual ~LanguageModelDALM();
virtual void Load(System &system);
virtual void SetParameter(const std::string& key, const std::string& value);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
protected:
FactorType m_factorType;
std::string m_filePath;
size_t m_nGramOrder; //! max n-gram length contained in this LM
size_t m_ContextSize;
DALM::Logger *m_logger;
DALM::Vocabulary *m_vocab;
DALM::LM *m_lm;
DALM::VocabId wid_start, wid_end;
const Factor *m_beginSentenceFactor;
mutable std::vector<DALM::VocabId> m_vocabMap;
void CreateVocabMapping(const std::string &wordstxt, const System &system);
};
}

View File

@ -1,171 +0,0 @@
/*
* SearchNormal.cpp
*
* Created on: 25 Oct 2015
* Author: hieu
*/
#include "Search.h"
#include <algorithm>
#include <boost/foreach.hpp>
#include "Stack.h"
#include "../Manager.h"
#include "../TrellisPath.h"
#include "../Sentence.h"
#include "../../TrellisPaths.h"
#include "../../InputPathsBase.h"
#include "../../Phrase.h"
#include "../../System.h"
#include "../../PhraseBased/TargetPhrases.h"
using namespace std;
namespace Moses2
{
namespace NSBatch
{
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
, m_stacks(mgr)
, m_batch(mgr.system.GetBatch(mgr.GetSystemPool()))
{
// TODO Auto-generated constructor stub
}
Search::~Search()
{
// TODO Auto-generated destructor stub
}
void Search::Decode()
{
// init stacks
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
m_stacks.Init(mgr, sentence.GetSize() + 1);
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
Decode(stackInd);
//cerr << m_stacks << endl;
// delete stack to save mem
if (stackInd < m_stacks.GetSize() - 1) {
m_stacks.Delete(stackInd);
}
//cerr << m_stacks << endl;
}
}
void Search::Decode(size_t stackInd)
{
Stack &stack = m_stacks[stackInd];
if (&stack == &m_stacks.Back()) {
// last stack. don't do anythin
return;
}
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPathBase *path, paths){
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
}
}
// process batch
mgr.system.featureFunctions.EvaluateWhenAppliedBatch(m_batch);
for (size_t i = 0; i < m_batch.size(); ++i) {
Hypothesis *hypo = m_batch[i];
m_stacks.Add(hypo, mgr.GetHypoRecycle(), mgr.arcLists);
}
m_batch.clear();
}
void Search::Extend(const Hypothesis &hypo, const InputPath &path)
{
const Bitmap &hypoBitmap = hypo.GetBitmap();
const Range &hypoRange = hypo.GetInputPath().range;
const Range &pathRange = path.range;
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
return;
}
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
return;
}
//cerr << " YES" << endl;
// extend this hypo
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
size_t numPt = mgr.system.mappings.size();
const TargetPhrases **tpsAllPt = path.targetPhrases;
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = tpsAllPt[i];
if (tps) {
Extend(hypo, *tps, path, newBitmap, estimatedScore);
}
}
}
void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
{
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
Extend(hypo, *tp, path, newBitmap, estimatedScore);
}
}
void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
{
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
m_batch.push_back(newHypo);
//newHypo->EvaluateWhenApplied();
//m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
}
const Hypothesis *Search::GetBestHypo() const
{
const Stack &lastStack = m_stacks.Back();
return lastStack.GetBestHypo<Hypothesis>();
}
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
{
const Stack &lastStack = m_stacks.Back();
const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
paths.Add(path);
}
}
} // namespace
}

View File

@ -1,53 +0,0 @@
/*
* SearchNormal.h
*
* Created on: 25 Oct 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "../../legacy/Range.h"
#include "../../legacy/Bitmap.h"
#include "../../TypeDef.h"
#include "../Search.h"
#include "Stacks.h"
namespace Moses2
{
class Hypothesis;
class InputPath;
class TargetPhrases;
class TargetPhraseImpl;
namespace NSBatch
{
class Stacks;
class Search: public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypo() const;
void AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const;
protected:
Stacks m_stacks;
Batch &m_batch;
void Decode(size_t stackInd);
void Extend(const Hypothesis &hypo, const InputPath &path);
void Extend(const Hypothesis &hypo, const TargetPhrases &tps,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
};
}
}

View File

@ -1,35 +0,0 @@
/*
* Stack.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Stack.h"
#include "../Hypothesis.h"
#include "../Manager.h"
#include "../../Scores.h"
#include "../../HypothesisColl.h"
using namespace std;
namespace Moses2
{
namespace NSBatch
{
Stack::Stack(const Manager &mgr) :
HypothesisColl(mgr)
{
// TODO Auto-generated constructor stub
}
Stack::~Stack()
{
// TODO Auto-generated destructor stub
}
}
}

View File

@ -1,32 +0,0 @@
/*
* Stack.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#pragma once
#include <boost/unordered_set.hpp>
#include <deque>
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../HypothesisColl.h"
#include "../../legacy/Util2.h"
namespace Moses2
{
namespace NSBatch
{
class Stack: public HypothesisColl
{
public:
Stack(const Manager &mgr);
virtual ~Stack();
protected:
};
}
}

View File

@ -1,67 +0,0 @@
/*
* Stacks.cpp
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#include "Stacks.h"
#include "../Manager.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSBatch
{
Stacks::Stacks(const Manager &mgr) :
m_mgr(mgr)
{
// TODO Auto-generated constructor stub
}
Stacks::~Stacks()
{
for (size_t i = 0; i < m_stacks.size(); ++i) {
delete m_stacks[i];
}
}
void Stacks::Init(const Manager &mgr, size_t numStacks)
{
m_stacks.resize(numStacks);
for (size_t i = 0; i < m_stacks.size(); ++i) {
m_stacks[i] = new Stack(mgr);
}
}
std::string Stacks::Debug(const System &system) const
{
stringstream out;
for (size_t i = 0; i < GetSize(); ++i) {
const Stack *stack = m_stacks[i];
if (stack) {
out << stack->GetSize() << " ";
}
else {
out << "N ";
}
}
return out.str();
}
void Stacks::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists)
{
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
Stack &stack = *m_stacks[numWordsCovered];
stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists);
}
}
}

View File

@ -1,62 +0,0 @@
/*
* Stacks.h
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "Stack.h"
#include "../../Recycler.h"
namespace Moses2
{
class Manager;
class ArcLists;
namespace NSBatch
{
class Stacks
{
public:
Stacks(const Manager &mgr);
virtual ~Stacks();
void Init(const Manager &mgr, size_t numStacks);
size_t GetSize() const
{
return m_stacks.size();
}
const Stack &Back() const
{
return *m_stacks.back();
}
Stack &operator[](size_t ind)
{
return *m_stacks[ind];
}
void Delete(size_t ind)
{
delete m_stacks[ind];
m_stacks[ind] = NULL;
}
void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists);
std::string Debug(const System &system) const;
protected:
const Manager &m_mgr;
std::vector<Stack*> m_stacks;
};
}
}

View File

@ -1,246 +0,0 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "Stack.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../TrellisPath.h"
#include "../Sentence.h"
#include "../../TrellisPaths.h"
#include "../../InputPathsBase.h"
#include "../../InputPathBase.h"
#include "../../System.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
#include "../../PhraseBased/TargetPhrases.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningMiniStack
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr) :
Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool())
, m_queue(QueueItemOrderer(),
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(
MemPoolAllocator<QueueItem*>(mgr.GetPool())))
, m_seenPositions(
MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
, m_queueItemRecycler(MemPoolAllocator<QueueItem*>(mgr.GetPool()))
{
}
Search::~Search()
{
}
void Search::Decode()
{
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
// init cue edges
m_cubeEdges.resize(sentence.GetSize() + 1);
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges(
m_cubeEdgeAlloc);
}
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
//cerr << "initHypo=" << *initHypo << endl;
m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
PostDecode(0);
for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1;
++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
m_stack.Clear();
Decode(stackInd);
PostDecode(stackInd);
//m_stack.DebugCounts();
//cerr << m_stacks << endl;
}
}
void Search::Decode(size_t stackInd)
{
Recycler<HypothesisBase*> &hypoRecycler = mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(
m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container){
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[stackInd];
BOOST_FOREACH(CubeEdge *edge, edges){
//cerr << *edge << " ";
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
/*
cerr << "edges: ";
boost::unordered_set<const Bitmap*> uniqueBM;
BOOST_FOREACH(CubeEdge *edge, edges) {
uniqueBM.insert(&edge->newBitmap);
//cerr << *edge << " ";
}
cerr << edges.size() << " " << uniqueBM.size();
cerr << endl;
*/
size_t pops = 0;
while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
if (mgr.system.options.cube.lazy_scoring) {
hypo->EvaluateWhenApplied();
}
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
// create hypo from every edge. Increase diversity
if (mgr.system.options.cube.diversity) {
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
}
}
}
}
void Search::PostDecode(size_t stackInd)
{
MemPool &pool = mgr.GetPool();
const InputPaths &paths = mgr.GetInputPaths();
const Matrix<InputPath*> &pathMatrix = paths.GetMatrix();
size_t inputSize = pathMatrix.GetRows();
size_t numPaths = pathMatrix.GetCols();
BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()){
const Bitmap &hypoBitmap = *val.first.first;
size_t firstGap = hypoBitmap.GetFirstGapPos();
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl;
// create edges to next hypos from existing hypos
for (size_t startPos = firstGap; startPos < inputSize; ++startPos) {
for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) {
const InputPath *path = pathMatrix.GetValue(startPos, pathInd);
if (path == NULL) {
break;
}
if (path->GetNumRules() == 0) {
continue;
}
const Range &pathRange = path->range;
//cerr << "pathRange=" << pathRange << endl;
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) {
continue;
}
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
size_t numWords = newBitmap.GetNumWordsCovered();
CubeEdges &edges = *m_cubeEdges[numWords];
// sort hypo for a particular bitmap and hypoEndPos
const Hypotheses &sortedHypos = val.second->GetSortedAndPruneHypos(mgr, mgr.arcLists);
size_t numPt = mgr.system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = path->targetPhrases[i];
if (tps && tps->GetSize()) {
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
edges.push_back(edge);
}
}
}
}
}
}
const Hypothesis *Search::GetBestHypo() const
{
const Hypothesis *bestHypo = m_stack.GetBestHypo();
return bestHypo;
}
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
{
const Stack::Coll &coll = m_stack.GetColl();
BOOST_FOREACH(const Stack::Coll::value_type &val, coll){
const Moses2::HypothesisColl &hypos = *val.second;
const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists);
BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) {
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
paths.Add(path);
}
}
}
}
}

View File

@ -1,125 +0,0 @@
/*
* Stack.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include <algorithm>
#include <boost/foreach.hpp>
#include "Stack.h"
#include "../Hypothesis.h"
#include "../Manager.h"
#include "../../Scores.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningMiniStack
{
Stack::Stack(const Manager &mgr) :
m_mgr(mgr), m_coll(
MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> >(
mgr.GetPool())), m_miniStackRecycler(
MemPoolAllocator<Moses2::HypothesisColl*>(mgr.GetPool()))
{
}
Stack::~Stack()
{
BOOST_FOREACH(const Coll::value_type &val, m_coll){
const Moses2::HypothesisColl *miniStack = val.second;
delete miniStack;
}
while (!m_miniStackRecycler.empty()) {
Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back();
m_miniStackRecycler.pop_back();
delete miniStack;
}
}
void Stack::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists)
{
HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
Moses2::HypothesisColl &coll = GetMiniStack(key);
coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists);
}
const Hypothesis *Stack::GetBestHypo() const
{
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
const HypothesisBase *bestHypo = NULL;
BOOST_FOREACH(const Coll::value_type &val, m_coll){
const Moses2::HypothesisColl &hypos = *val.second;
const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
if (hypo && hypo->GetFutureScore() > bestScore) {
bestScore = hypo->GetFutureScore();
bestHypo = hypo;
}
}
return &bestHypo->Cast<Hypothesis>();
}
size_t Stack::GetHypoSize() const
{
size_t ret = 0;
BOOST_FOREACH(const Coll::value_type &val, m_coll){
const Moses2::HypothesisColl &hypos = *val.second;
ret += hypos.GetSize();
}
return ret;
}
Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key)
{
Moses2::HypothesisColl *ret;
Coll::iterator iter = m_coll.find(key);
if (iter == m_coll.end()) {
if (m_miniStackRecycler.empty()) {
ret = new Moses2::HypothesisColl(m_mgr);
}
else {
ret = m_miniStackRecycler.back();
ret->Clear();
m_miniStackRecycler.pop_back();
}
m_coll[key] = ret;
}
else {
ret = iter->second;
}
return *ret;
}
void Stack::Clear()
{
BOOST_FOREACH(const Coll::value_type &val, m_coll){
Moses2::HypothesisColl *miniStack = val.second;
m_miniStackRecycler.push_back(miniStack);
}
m_coll.clear();
}
void Stack::DebugCounts()
{
cerr << "counts=";
BOOST_FOREACH(const Coll::value_type &val, GetColl()){
const Moses2::HypothesisColl &miniStack = *val.second;
size_t count = miniStack.GetSize();
cerr << count << " ";
}
cerr << endl;
}
}
}

View File

@ -1,281 +0,0 @@
/*
* Manager.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <boost/functional/hash.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <sstream>
#include "Manager.h"
#include "TargetPhraseImpl.h"
#include "InputPath.h"
#include "Sentence.h"
#include "Normal/Search.h"
#include "CubePruningMiniStack/Search.h"
#include "Batch/Search.h"
/*
#include "CubePruningPerMiniStack/Search.h"
#include "CubePruningPerBitmap/Search.h"
#include "CubePruningCardinalStack/Search.h"
#include "CubePruningBitmapStack/Search.h"
*/
#include "../TrellisPaths.h"
#include "../System.h"
#include "../Phrase.h"
#include "../InputPathsBase.h"
#include "../TranslationModel/PhraseTable.h"
#include "../TranslationModel/UnknownWordPenalty.h"
#include "../legacy/Range.h"
#include "../PhraseBased/TargetPhrases.h"
using namespace std;
namespace Moses2
{
Manager::Manager(System &sys, const TranslationTask &task,
const std::string &inputStr, long translationId) :
ManagerBase(sys, task, inputStr, translationId)
,m_search(NULL)
,m_bitmaps(NULL)
{
//cerr << translationId << " inputStr=" << inputStr << endl;
}
Manager::~Manager()
{
//cerr << "Start ~Manager " << this << endl;
delete m_search;
delete m_bitmaps;
//cerr << "Finish ~Manager " << this << endl;
}
void Manager::Init()
{
// init pools etc
InitPools();
FactorCollection &vocab = system.GetVocab();
m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
m_bitmaps = new Bitmaps(GetPool());
const PhraseTable &firstPt = *system.featureFunctions.m_phraseTables[0];
m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
GetPool(), firstPt, system, 0);
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
//cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
m_inputPaths.Init(sentence, *this);
// xml
const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
// lookup with every pt
const std::vector<const PhraseTable*> &pts = system.mappings;
for (size_t i = 0; i < pts.size(); ++i) {
const PhraseTable &pt = *pts[i];
//cerr << "Looking up from " << pt.GetName() << endl;
pt.Lookup(*this, m_inputPaths);
}
//m_inputPaths.DeleteUnusedPaths();
CalcFutureScore();
m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
switch (system.options.search.algo) {
case Normal:
m_search = new NSNormal::Search(*this);
break;
case NormalBatch:
m_search = new NSBatch::Search(*this);
break;
case CubePruning:
case CubePruningMiniStack:
m_search = new NSCubePruningMiniStack::Search(*this);
break;
/*
case CubePruningPerMiniStack:
m_search = new NSCubePruningPerMiniStack::Search(*this);
break;
case CubePruningPerBitmap:
m_search = new NSCubePruningPerBitmap::Search(*this);
break;
case CubePruningCardinalStack:
m_search = new NSCubePruningCardinalStack::Search(*this);
break;
case CubePruningBitmapStack:
m_search = new NSCubePruningBitmapStack::Search(*this);
break;
*/
default:
cerr << "Unknown search algorithm" << endl;
abort();
}
}
void Manager::Decode()
{
//cerr << "Start Decode " << this << endl;
Init();
m_search->Decode();
//cerr << "Finished Decode " << this << endl;
}
void Manager::CalcFutureScore()
{
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
size_t size = sentence.GetSize();
m_estimatedScores =
new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
size);
m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
// walk all the translation options and record the cheapest option for each span
BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
const Range &range = path->range;
SCORE bestScore = -numeric_limits<SCORE>::infinity();
size_t numPt = system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
if (tps) {
BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
SCORE score = tp->GetFutureScore();
if (score > bestScore) {
bestScore = score;
}
}
}
}
m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
}
// now fill all the cells in the strictly upper triangle
// there is no way to modify the diagonal now, in the case
// where no translation option covers a single-word span,
// we leave the +inf in the matrix
// like in chart parsing we want each cell to contain the highest score
// of the full-span trOpt or the sum of scores of joining two smaller spans
for (size_t colstart = 1; colstart < size; colstart++) {
for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
size_t sPos = diagshift;
size_t ePos = colstart + diagshift;
for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
+ m_estimatedScores->GetValue(joinAt + 1, ePos);
// uncomment to see the cell filling scheme
// TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
// << sPos << "," << joinAt << "]+["
// << joinAt+1 << "," << ePos << "] (colstart: "
// << colstart << ", diagshift: " << diagshift << ")"
// << endl);
if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
sPos, ePos, joinedScore);
}
}
}
//cerr << "Square matrix:" << endl;
//cerr << *m_estimatedScores << endl;
}
std::string Manager::OutputBest() const
{
stringstream out;
Moses2::FixPrecision(out);
const Hypothesis *bestHypo = m_search->GetBestHypo();
if (bestHypo) {
if (system.options.output.ReportHypoScore) {
out << bestHypo->GetScores().GetTotalScore() << " ";
}
bestHypo->OutputToStream(out);
//cerr << "BEST TRANSLATION: " << *bestHypo;
}
else {
if (system.options.output.ReportHypoScore) {
out << "0 ";
}
//cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
}
return out.str();
//cerr << endl;
}
std::string Manager::OutputNBest()
{
arcLists.Sort();
boost::unordered_set<size_t> distinctHypos;
TrellisPaths<TrellisPath> contenders;
m_search->AddInitialTrellisPaths(contenders);
long transId = GetTranslationId();
// MAIN LOOP
stringstream out;
//Moses2::FixPrecision(out);
size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
size_t bestInd = 0;
for (size_t i = 0; i < maxIter; ++i) {
if (bestInd > system.options.nbest.nbest_size || contenders.empty()) {
break;
}
//cerr << "bestInd=" << bestInd << endl;
TrellisPath *path = contenders.Get();
bool ok = false;
if (system.options.nbest.only_distinct) {
string tgtPhrase = path->OutputTargetPhrase(system);
//cerr << "tgtPhrase=" << tgtPhrase << endl;
boost::hash<std::string> string_hash;
size_t hash = string_hash(tgtPhrase);
if (distinctHypos.insert(hash).second) {
ok = true;
}
}
else {
ok = true;
}
if (ok) {
++bestInd;
out << transId << " ||| ";
path->OutputToStream(out, system);
out << "\n";
}
// create next paths
path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
delete path;
}
return out.str();
}
std::string Manager::OutputTransOpt()
{
return "";
}
}

View File

@ -1,167 +0,0 @@
/*
* SearchNormal.cpp
*
* Created on: 25 Oct 2015
* Author: hieu
*/
#include "Search.h"
#include <algorithm>
#include <boost/foreach.hpp>
#include "Stack.h"
#include "../Manager.h"
#include "../TrellisPath.h"
#include "../Sentence.h"
#include "../../TrellisPaths.h"
#include "../../InputPathsBase.h"
#include "../../Phrase.h"
#include "../../System.h"
#include "../../PhraseBased/TargetPhrases.h"
using namespace std;
namespace Moses2
{
namespace NSNormal
{
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
, m_stacks(mgr)
{
// TODO Auto-generated constructor stub
}
Search::~Search()
{
// TODO Auto-generated destructor stub
}
void Search::Decode()
{
// init stacks
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
m_stacks.Init(mgr, sentence.GetSize() + 1);
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
Decode(stackInd);
//cerr << m_stacks << endl;
// delete stack to save mem
if (stackInd < m_stacks.GetSize() - 1) {
m_stacks.Delete(stackInd);
}
//cerr << m_stacks << endl;
}
}
void Search::Decode(size_t stackInd)
{
//cerr << "stackInd=" << stackInd << endl;
Stack &stack = m_stacks[stackInd];
if (&stack == &m_stacks.Back()) {
// last stack. don't do anythin
return;
}
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
//cerr << "hypos=" << hypos.size() << endl;
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPathBase *path, paths){
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
}
}
}
void Search::Extend(const Hypothesis &hypo, const InputPath &path)
{
const Bitmap &hypoBitmap = hypo.GetBitmap();
const Range &hypoRange = hypo.GetInputPath().range;
const Range &pathRange = path.range;
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
return;
}
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
return;
}
// extend this hypo
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
size_t numPt = mgr.system.mappings.size();
const TargetPhrases **tpsAllPt = path.targetPhrases;
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = tpsAllPt[i];
if (tps) {
Extend(hypo, *tps, path, newBitmap, estimatedScore);
}
}
}
void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
{
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
Extend(hypo, *tp, path, newBitmap, estimatedScore);
}
}
void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
{
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
newHypo->EvaluateWhenApplied();
m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
}
const Hypothesis *Search::GetBestHypo() const
{
const Stack &lastStack = m_stacks.Back();
const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr,
mgr.arcLists);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = static_cast<const Hypothesis*>(sortedHypos[0]);
}
return best;
}
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
{
const Stack &lastStack = m_stacks.Back();
const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
paths.Add(path);
}
}
} // namespace
}

View File

@ -1,15 +0,0 @@
/*
* PhraseImpl.cpp
*
* Created on: 19 Feb 2016
* Author: hieu
*/
#include "PhraseImpl.h"
using namespace std;
namespace Moses2
{
}

View File

@ -1,31 +0,0 @@
#pragma once
#include "../PhraseImplTemplate.h"
#include "../SubPhrase.h"
namespace Moses2
{
class PhraseImpl: public PhraseImplTemplate<Word>
{
public:
static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
const System &system, const std::string &str)
{
std::vector<std::string> toks = Moses2::Tokenize(str);
size_t size = toks.size();
PhraseImpl *ret;
ret = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, size);
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks);
return ret;
}
PhraseImpl(MemPool &pool, size_t size) :
PhraseImplTemplate<Word>(pool, size)
{
}
};
}

View File

@ -1,174 +0,0 @@
/*
* Sentence.cpp
*
* Created on: 14 Dec 2015
* Author: hieu
*/
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include "Sentence.h"
#include "../System.h"
#include "../parameters/AllOptions.h"
#include "../legacy/Util2.h"
using namespace std;
namespace Moses2
{
Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
const System &system, const std::string &str)
{
Sentence *ret;
if (system.options.input.xml_policy) {
// xml
ret = CreateFromStringXML(pool, vocab, system, str);
}
else {
// no xml
//cerr << "PB Sentence" << endl;
std::vector<std::string> toks = Tokenize(str);
size_t size = toks.size();
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
}
//cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
//cerr << "ret=" << ret->Debug(system) << endl;
return ret;
}
Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
const System &system, const std::string &str)
{
Sentence *ret;
vector<XMLOption*> xmlOptions;
pugi::xml_document doc;
string str2 = "<xml>" + str + "</xml>";
pugi::xml_parse_result result = doc.load(str2.c_str(),
pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
pugi::xml_node topNode = doc.child("xml");
std::vector<std::string> toks;
XMLParse(pool, system, 0, topNode, toks, xmlOptions);
// debug
/*
cerr << "xmloptions:" << endl;
for (size_t i = 0; i < xmlOptions.size(); ++i) {
cerr << xmlOptions[i]->Debug(system) << endl;
}
*/
// create words
size_t size = toks.size();
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
// xml
ret->Init(system, size, system.options.reordering.max_distortion);
ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
// set reordering walls, if "-monotone-at-punction" is set
if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
reorderingConstraint.SetMonotoneAtPunctuation(*ret);
}
// set walls obtained from xml
for(size_t i=0; i<xmlOptions.size(); i++) {
const XMLOption *xmlOption = xmlOptions[i];
if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
UTIL_THROW_IF2(xmlOption->startPos >= ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please
reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
}
else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
}
else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
FactorType placeholderFactor = system.options.input.placeholder_factor;
UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
"Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
UTIL_THROW_IF2(xmlOption->phraseSize != 1,
"Placeholder must only cover 1 word");
const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
(*ret)[xmlOption->startPos][placeholderFactor] = factor;
}
else {
// default - forced translation. Add to class variable
ret->AddXMLOption(system, xmlOption);
}
}
reorderingConstraint.FinalizeWalls();
return ret;
}
void Sentence::XMLParse(
MemPool &pool,
const System &system,
size_t depth,
const pugi::xml_node &parentNode,
std::vector<std::string> &toks,
vector<XMLOption*> &xmlOptions)
{ // pugixml
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
string nodeName = childNode.name();
//cerr << depth << " nodeName=" << nodeName << endl;
int startPos = toks.size();
string value = childNode.value();
if (!value.empty()) {
//cerr << depth << "childNode text=" << value << endl;
std::vector<std::string> subPhraseToks = Tokenize(value);
for (size_t i = 0; i < subPhraseToks.size(); ++i) {
toks.push_back(subPhraseToks[i]);
}
}
if (!nodeName.empty()) {
XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
pugi::xml_attribute attr;
attr = childNode.attribute("translation");
if (!attr.empty()) {
xmlOption->SetTranslation(pool, attr.as_string());
}
attr = childNode.attribute("entity");
if (!attr.empty()) {
xmlOption->SetEntity(pool, attr.as_string());
}
attr = childNode.attribute("prob");
if (!attr.empty()) {
xmlOption->prob = attr.as_float();
}
xmlOptions.push_back(xmlOption);
// recursively call this function. For proper recursive trees
XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
size_t endPos = toks.size();
xmlOption->phraseSize = endPos - startPos;
/*
cerr << "xmlOptions=";
xmlOption->Debug(cerr, system);
cerr << endl;
*/
}
}
}
} /* namespace Moses2 */

View File

@ -1,155 +0,0 @@
/*
* Sentence.cpp
*
* Created on: 14 Dec 2015
* Author: hieu
*/
#include "Sentence.h"
#include "../System.h"
using namespace std;
namespace Moses2
{
namespace SCFG
{
Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
const System &system, const std::string &str, long translationId)
{
//cerr << "SCFG Sentence" << endl;
Sentence *ret;
if (system.options.input.xml_policy) {
// xml
ret = CreateFromStringXML(pool, vocab, system, str);
//cerr << "ret=" << ret->Debug(system) << endl;
}
else {
std::vector<std::string> toks = Tokenize(str);
size_t size = toks.size() + 2;
ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
}
return ret;
}
Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
const System &system, const std::string &str)
{
Sentence *ret;
vector<XMLOption*> xmlOptions;
pugi::xml_document doc;
string str2 = "<xml>" + str + "</xml>";
pugi::xml_parse_result result = doc.load(str2.c_str(),
pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
pugi::xml_node topNode = doc.child("xml");
std::vector<std::string> toks;
XMLParse(pool, system, 0, topNode, toks, xmlOptions);
// debug
/*
cerr << "xmloptions:" << endl;
for (size_t i = 0; i < xmlOptions.size(); ++i) {
cerr << xmlOptions[i]->Debug(system) << endl;
}
*/
// create words
size_t size = toks.size() + 2;
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
// xml
for(size_t i=0; i<xmlOptions.size(); i++) {
const XMLOption *xmlOption = xmlOptions[i];
if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
FactorType placeholderFactor = system.options.input.placeholder_factor;
UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
"Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
UTIL_THROW_IF2(xmlOption->phraseSize != 1,
"Placeholder must only cover 1 word");
const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
(*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
}
else {
// default - forced translation. Add to class variable
ret->AddXMLOption(system, xmlOption);
}
}
//cerr << "ret=" << ret->Debug(system) << endl;
return ret;
}
void Sentence::XMLParse(
MemPool &pool,
const System &system,
size_t depth,
const pugi::xml_node &parentNode,
std::vector<std::string> &toks,
vector<XMLOption*> &xmlOptions)
{ // pugixml
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
string nodeName = childNode.name();
//cerr << depth << " nodeName=" << nodeName << endl;
int startPos = toks.size();
string value = childNode.value();
if (!value.empty()) {
//cerr << depth << "childNode text=" << value << endl;
std::vector<std::string> subPhraseToks = Tokenize(value);
for (size_t i = 0; i < subPhraseToks.size(); ++i) {
toks.push_back(subPhraseToks[i]);
}
}
if (!nodeName.empty()) {
XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
pugi::xml_attribute attr;
attr = childNode.attribute("translation");
if (!attr.empty()) {
xmlOption->SetTranslation(pool, attr.as_string());
}
attr = childNode.attribute("entity");
if (!attr.empty()) {
xmlOption->SetEntity(pool, attr.as_string());
}
attr = childNode.attribute("prob");
if (!attr.empty()) {
xmlOption->prob = attr.as_float();
}
xmlOptions.push_back(xmlOption);
// recursively call this function. For proper recursive trees
XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
size_t endPos = toks.size();
xmlOption->phraseSize = endPos - startPos;
/*
cerr << "xmlOptions=";
xmlOption->Debug(cerr, system);
cerr << endl;
*/
}
}
}
}
} /* namespace Moses2 */

View File

@ -1,74 +0,0 @@
/*
* KBestExtractor.cpp
*
* Created on: 2 Aug 2016
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <sstream>
#include "KBestExtractor.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../Stacks.h"
#include "../Stack.h"
#include "../Sentence.h"
#include "../../System.h"
#include "../../Scores.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
//bool g_debug = false;
namespace SCFG
{
/////////////////////////////////////////////////////////////
KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
:m_mgr(mgr)
{
}
KBestExtractor::~KBestExtractor()
{
}
void KBestExtractor::OutputToStream(std::stringstream &strm)
{
//cerr << "1" << flush;
const Stack &lastStack = m_mgr.GetStacks().GetLastStack();
UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack");
UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection");
const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos();
UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection");
const HypothesisBase *hypo = hypos[0];
const ArcLists &arcLists = m_mgr.arcLists;
const ArcList &arcList = arcLists.GetArcList(hypo);
NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList);
size_t ind = 0;
while (nbests.Extend(m_mgr, m_nbestColl, ind)) {
const NBest &deriv = nbests.Get(ind);
strm << m_mgr.GetTranslationId() << " ||| ";
//cerr << "1" << flush;
strm << deriv.GetStringExclSentenceMarkers();
//cerr << "2" << flush;
strm << " ||| ";
deriv.GetScores().OutputBreakdown(strm, m_mgr.system);
//cerr << "3" << flush;
strm << "||| ";
strm << deriv.GetScores().GetTotalScore();
//cerr << "4" << flush;
strm << endl;
++ind;
}
}
}
} /* namespace Moses2 */

View File

@ -1,194 +0,0 @@
/*
* NBest.cpp
*
* Created on: 24 Aug 2016
* Author: hieu
*/
#include <sstream>
#include <boost/foreach.hpp>
#include "util/exception.hh"
#include "NBest.h"
#include "NBests.h"
#include "NBestColl.h"
#include "../Manager.h"
#include "../TargetPhraseImpl.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace SCFG
{
NBest::NBest(
const SCFG::Manager &mgr,
const ArcList &varcList,
size_t vind,
NBestColl &nbestColl)
:arcList(&varcList)
,arcInd(vind)
{
const SCFG::Hypothesis &hypo = GetHypo();
// copy scores from best hypo
MemPool &pool = mgr.GetPool();
m_scores = new (pool.Allocate<Scores>())
Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores());
// children
const ArcLists &arcLists = mgr.arcLists;
//const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
const Vector<const Hypothesis*> &prevHypos = hypo.GetPrevHypos();
for (size_t i = 0; i < prevHypos.size(); ++i) {
const SCFG::Hypothesis *prevHypo = prevHypos[i];
const ArcList &childArc = arcLists.GetArcList(prevHypo);
NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc);
Child child(&childNBests, 0);
children.push_back(child);
}
stringstream strm;
OutputToStream(mgr, strm, nbestColl);
m_str = strm.str();
}
NBest::NBest(const SCFG::Manager &mgr,
const NBest &orig,
size_t childInd,
NBestColl &nbestColl)
:arcList(orig.arcList)
,arcInd(orig.arcInd)
,children(orig.children)
{
Child &child = children[childInd];
size_t &ind = child.second;
++ind;
UTIL_THROW_IF2(ind >= child.first->GetSize(),
"out of bound:" << ind << ">=" << child.first->GetSize());
// scores
MemPool &pool = mgr.GetPool();
m_scores = new (pool.Allocate<Scores>())
Scores(mgr.system,
pool,
mgr.system.featureFunctions.GetNumScores(),
orig.GetScores());
const Scores &origScores = orig.GetChild(childInd).GetScores();
const Scores &newScores = GetChild(childInd).GetScores();
m_scores->MinusEquals(mgr.system, origScores);
m_scores->PlusEquals(mgr.system, newScores);
stringstream strm;
OutputToStream(mgr, strm, nbestColl);
m_str = strm.str();
}
const SCFG::Hypothesis &NBest::GetHypo() const
{
const HypothesisBase *hypoBase = (*arcList)[arcInd];
const SCFG::Hypothesis &hypo = *static_cast<const SCFG::Hypothesis*>(hypoBase);
return hypo;
}
const NBest &NBest::GetChild(size_t ind) const
{
const Child &child = children[ind];
const NBests &nbests = *child.first;
const NBest &nbest = nbests.Get(child.second);
return nbest;
}
void NBest::CreateDeviants(
const SCFG::Manager &mgr,
NBestColl &nbestColl,
Contenders &contenders) const
{
if (arcInd + 1 < arcList->size()) {
// to use next arclist, all children must be 1st. Not sure if this is correct
bool ok = true;
BOOST_FOREACH(const Child &child, children) {
if (child.second) {
ok = false;
break;
}
}
if (ok) {
NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl);
contenders.push(next);
}
}
for (size_t childInd = 0; childInd < children.size(); ++childInd) {
const Child &child = children[childInd];
NBests &childNBests = *child.first;
bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1);
if (extended) {
//cerr << "HH1 " << childInd << endl;
NBest *next = new NBest(mgr, *this, childInd, nbestColl);
//cerr << "HH2 " << childInd << endl;
contenders.push(next);
//cerr << "HH3 " << childInd << endl;
}
}
}
void NBest::OutputToStream(
const SCFG::Manager &mgr,
std::stringstream &strm,
const NBestColl &nbestColl) const
{
const SCFG::Hypothesis &hypo = GetHypo();
//strm << &hypo << " ";
const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
const SCFG::Word &word = tp[targetPos];
//cerr << "word " << pos << "=" << word << endl;
if (word.isNonTerminal) {
//cerr << "is nt" << endl;
// non-term. fill out with prev hypo
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size());
const NBest &nbest = GetChild(nonTermInd);
strm << nbest.GetString();
}
else {
//cerr << "not nt" << endl;
word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm);
strm << " ";
}
}
}
std::string NBest::Debug(const System &system) const
{
stringstream strm;
strm << GetScores().GetTotalScore() << " "
<< arcList << "("
<< arcList->size() << ")["
<< arcInd << "] ";
for (size_t i = 0; i < children.size(); ++i) {
const Child &child = children[i];
const NBest &childNBest = child.first->Get(child.second);
strm << child.first << "("
<< child.first->GetSize() << ")["
<< child.second << "]";
strm << childNBest.GetScores().GetTotalScore() << " ";
}
return strm.str();
}
}
}

View File

@ -1,100 +0,0 @@
/*
* NBest.h
*
* Created on: 24 Aug 2016
* Author: hieu
*/
#pragma once
#include <queue>
#include <vector>
#include <string>
#include <stdlib.h>
#include "../../Scores.h"
#include "../../ArcLists.h"
namespace Moses2
{
class Scores;
class System;
namespace SCFG
{
class NBest;
class NBests;
class NBestScoreOrderer;
class Manager;
class NBestColl;
class Hypothesis;
/////////////////////////////////////////////////////////////
typedef std::priority_queue<NBest*, std::vector<NBest*>, NBestScoreOrderer> Contenders;
/////////////////////////////////////////////////////////////
class NBest
{
public:
const ArcList *arcList;
size_t arcInd;
typedef std::pair<NBests*, size_t> Child; // key to another NBest
typedef std::vector<Child> Children;
Children children;
NBest(const SCFG::Manager &mgr,
const ArcList &varcList,
size_t vind,
NBestColl &nbestColl);
NBest(const SCFG::Manager &mgr,
const NBest &orig,
size_t childInd,
NBestColl &nbestColl);
void CreateDeviants(
const SCFG::Manager &mgr,
NBestColl &nbestColl,
Contenders &contenders) const;
const Scores &GetScores() const
{ return *m_scores; }
const NBest &GetChild(size_t ind) const;
const std::string &GetString() const
{ return m_str; }
std::string GetStringExclSentenceMarkers() const
{
std::string ret = m_str.substr(4, m_str.size() - 10);
return ret;
}
std::string Debug(const System &system) const;
protected:
Scores *m_scores;
std::string m_str;
const SCFG::Hypothesis &GetHypo() const;
void OutputToStream(
const SCFG::Manager &mgr,
std::stringstream &strm,
const NBestColl &nbestColl) const;
};
/////////////////////////////////////////////////////////////
class NBestScoreOrderer
{
public:
bool operator()(const NBest* a, const NBest* b) const
{
return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore();
}
};
}
}

View File

@ -1,111 +0,0 @@
/*
* NBests.cpp
*
* Created on: 24 Aug 2016
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "NBests.h"
#include "../Manager.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace SCFG
{
NBests::NBests(const SCFG::Manager &mgr,
const ArcList &arcList,
NBestColl &nbestColl)
:indIter(0)
{
// best
NBest *contender = new NBest(mgr, arcList, 0, nbestColl);
contenders.push(contender);
bool extended = Extend(mgr, nbestColl, 0);
assert(extended);
}
NBests::~NBests()
{
BOOST_FOREACH(const NBest *nbest, m_coll) {
delete nbest;
}
// delete bad contenders left in queue
while (!contenders.empty()) {
NBest *contender = contenders.top();
contenders.pop();
delete contender;
}
}
bool NBests::Extend(const SCFG::Manager &mgr,
NBestColl &nbestColl,
size_t ind)
{
if (ind < m_coll.size()) {
// asking for 1 we've dont already
return true;
}
assert(ind == m_coll.size());
// checks
if (ind >= mgr.system.options.nbest.nbest_size) {
return false;
}
size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor;
// MAIN LOOP, create 1 new deriv.
// The loop is for distinct nbest
bool ok = false;
while (!ok) {
++indIter;
if (indIter > maxIter) {
return false;
}
if (contenders.empty()) {
return false;
}
NBest *contender = contenders.top();
contenders.pop();
contender->CreateDeviants(mgr, nbestColl, contenders);
if (mgr.system.options.nbest.only_distinct) {
const string &tgtPhrase = contender->GetString();
//cerr << "tgtPhrase=" << tgtPhrase << endl;
boost::hash<std::string> string_hash;
size_t hash = string_hash(tgtPhrase);
if (distinctHypos.insert(hash).second) {
ok = true;
}
}
else {
ok = true;
}
if (ok) {
Add(contender);
//cerr << best->GetScores().GetTotalScore() << " ";
//cerr << best->Debug(mgr.system) << endl;
return true;
}
else {
delete contender;
}
}
return false;
}
}
}

View File

@ -1,53 +0,0 @@
/*
* NBests.h
*
* Created on: 24 Aug 2016
* Author: hieu
*/
#pragma once
#include <boost/unordered_set.hpp>
#include "NBest.h"
namespace Moses2
{
namespace SCFG
{
class NBests
{
public:
Contenders contenders;
boost::unordered_set<size_t> distinctHypos;
NBests(const SCFG::Manager &mgr,
const ArcList &arcList,
NBestColl &nbestColl);
virtual ~NBests();
size_t GetSize() const
{ return m_coll.size(); }
const NBest &Get(size_t ind) const
{ return *m_coll[ind]; }
bool Extend(const SCFG::Manager &mgr,
NBestColl &nbestColl,
size_t ind);
protected:
std::vector<const NBest*> m_coll;
size_t indIter;
void Add(const NBest *nbest)
{
m_coll.push_back(nbest);
}
};
}
}

View File

@ -1,466 +0,0 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <deque>
#include "PhraseDecoder.h"
#include "../../System.h"
#include "../../SubPhrase.h"
using namespace std;
namespace Moses2
{
PhraseDecoder::PhraseDecoder(
PhraseTableCompact &phraseDictionary,
const std::vector<FactorType>* input,
const std::vector<FactorType>* output,
size_t numScoreComponent
// , const std::vector<float>* weight
)
: m_coding(None), m_numScoreComponent(numScoreComponent),
m_containsAlignmentInfo(true), m_maxRank(0),
m_symbolTree(0), m_multipleScoreTrees(false),
m_scoreTrees(1), m_alignTree(0),
m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
// m_weight(weight),
m_separator(" ||| ")
{ }
PhraseDecoder::~PhraseDecoder()
{
if(m_symbolTree)
delete m_symbolTree;
for(size_t i = 0; i < m_scoreTrees.size(); i++)
if(m_scoreTrees[i])
delete m_scoreTrees[i];
if(m_alignTree)
delete m_alignTree;
}
inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
{
boost::unordered_map<std::string, unsigned>::iterator it
= m_sourceSymbolsMap.find(symbol);
if(it != m_sourceSymbolsMap.end())
return it->second;
size_t idx = m_sourceSymbols.find(symbol);
m_sourceSymbolsMap[symbol] = idx;
return idx;
}
inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
{
if(idx < m_targetSymbols.size())
return m_targetSymbols[idx];
return std::string("##ERROR##");
}
inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
{
return (encodedSymbol >> 30) + 1;
}
inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
{
return (encodedSymbol >> 31) + 1;
}
inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
{
size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
return m_lexicalTable[srcTrgIdx + rank].second;
}
size_t PhraseDecoder::GetMaxSourcePhraseLength()
{
return m_maxPhraseLength;
}
inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
{
return encodedSymbol &= ~(3 << 30);
}
inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
{
return encodedSymbol &= ~(255 << 24);
}
inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
{
encodedSymbol &= ~(3 << 30);
encodedSymbol >>= 24;
return encodedSymbol;
}
inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
{
return encodedSymbol &= ~(3 << 30);
}
inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
{
return encodedSymbol &= ~(1 << 31);
}
inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
{
return ((encodedSymbol >> 25) & 63) - 32;
}
inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
{
return ((encodedSymbol >> 19) & 63) - 32;
}
inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
{
return (encodedSymbol & 524287);
}
size_t PhraseDecoder::Load(std::FILE* in)
{
size_t start = std::ftell(in);
size_t read = 0;
read += std::fread(&m_coding, sizeof(m_coding), 1, in);
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
if(m_coding == REnc) {
m_sourceSymbols.load(in);
size_t size;
read += std::fread(&size, sizeof(size_t), 1, in);
m_lexicalTableIndex.resize(size);
read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
read += std::fread(&size, sizeof(size_t), 1, in);
m_lexicalTable.resize(size);
read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
}
m_targetSymbols.load(in);
m_symbolTree = new CanonicalHuffman<unsigned>(in);
read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
if(m_multipleScoreTrees) {
m_scoreTrees.resize(m_numScoreComponent);
for(size_t i = 0; i < m_numScoreComponent; i++)
m_scoreTrees[i] = new CanonicalHuffman<float>(in);
} else {
m_scoreTrees.resize(1);
m_scoreTrees[0] = new CanonicalHuffman<float>(in);
}
if(m_containsAlignmentInfo)
m_alignTree = new CanonicalHuffman<AlignPoint>(in);
size_t end = std::ftell(in);
return end - start;
}
std::string PhraseDecoder::MakeSourceKey(std::string &source)
{
return source + m_separator;
}
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
const ManagerBase &mgr,
const Phrase<Word> &sourcePhrase,
bool topLevel,
bool eval)
{
// Not using TargetPhraseCollection avoiding "new" operator
// which can introduce heavy locking with multiple threads
TargetPhraseVectorPtr tpv(new TargetPhraseVector());
size_t bitsLeft = 0;
if(m_coding == PREnc) {
std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
= m_decodingCache.Retrieve(sourcePhrase);
// Has been cached and is complete or does not need to be completed
if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
return cachedPhraseColl.first;
// Has been cached, but is incomplete
else if(cachedPhraseColl.first != NULL) {
bitsLeft = cachedPhraseColl.second;
tpv->resize(cachedPhraseColl.first->size());
std::copy(cachedPhraseColl.first->begin(),
cachedPhraseColl.first->end(),
tpv->begin());
}
}
// Retrieve source phrase identifier
std::string sourcePhraseString = sourcePhrase.GetString(*m_input);
size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
/*
cerr << "sourcePhraseString=" << sourcePhraseString << " "
<< sourcePhraseId
<< endl;
*/
if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) {
// Retrieve compressed and encoded target phrase collection
std::string encodedPhraseCollection;
if(m_phraseDictionary.m_inMemory)
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
else
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
BitWrapper<> encodedBitStream(encodedPhraseCollection);
if(m_coding == PREnc && bitsLeft)
encodedBitStream.SeekFromEnd(bitsLeft);
// Decompress and decode target phrase collection
TargetPhraseVectorPtr decodedPhraseColl =
DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval);
return decodedPhraseColl;
} else
return TargetPhraseVectorPtr();
}
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
const ManagerBase &mgr,
TargetPhraseVectorPtr tpv,
BitWrapper<> &encodedBitStream,
const Phrase<Word> &sourcePhrase,
bool topLevel,
bool eval)
{
const System &system = mgr.system;
FactorCollection &vocab = system.GetVocab();
bool extending = tpv->size();
size_t bitsLeft = encodedBitStream.TellFromEnd();
std::vector<int> sourceWords;
if(m_coding == REnc) {
for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
std::string sourceWord
= sourcePhrase[i].GetString(*m_input);
unsigned idx = GetSourceSymbolId(sourceWord);
sourceWords.push_back(idx);
}
}
unsigned phraseStopSymbol = 0;
AlignPoint alignStopSymbol(-1, -1);
std::vector<float> scores;
std::set<AlignPointSizeT> alignment;
enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
size_t srcSize = sourcePhrase.GetSize();
TPCompact* targetPhrase = NULL;
while(encodedBitStream.TellFromEnd()) {
if(state == New) {
// Creating new TargetPhrase on the heap
tpv->push_back(TPCompact());
targetPhrase = &tpv->back();
alignment.clear();
scores.clear();
state = Symbol;
}
if(state == Symbol) {
unsigned symbol = m_symbolTree->Read(encodedBitStream);
if(symbol == phraseStopSymbol) {
state = Score;
} else {
if(m_coding == REnc) {
std::string wordString;
size_t type = GetREncType(symbol);
if(type == 1) {
unsigned decodedSymbol = DecodeREncSymbol1(symbol);
wordString = GetTargetSymbol(decodedSymbol);
} else if (type == 2) {
size_t rank = DecodeREncSymbol2Rank(symbol);
size_t srcPos = DecodeREncSymbol2Position(symbol);
if(srcPos >= sourceWords.size())
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo) {
size_t trgPos = targetPhrase->words.size();
alignment.insert(AlignPoint(srcPos, trgPos));
}
} else if(type == 3) {
size_t rank = DecodeREncSymbol3(symbol);
size_t srcPos = targetPhrase->words.size();
if(srcPos >= sourceWords.size())
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo) {
size_t trgPos = srcPos;
alignment.insert(AlignPoint(srcPos, trgPos));
}
}
Word word;
word.CreateFromString(vocab, system, wordString);
targetPhrase->words.push_back(word);
} else if(m_coding == PREnc) {
// if the symbol is just a word
if(GetPREncType(symbol) == 1) {
unsigned decodedSymbol = DecodePREncSymbol1(symbol);
Word word;
word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
targetPhrase->words.push_back(word);
}
// if the symbol is a subphrase pointer
else {
int left = DecodePREncSymbol2Left(symbol);
int right = DecodePREncSymbol2Right(symbol);
unsigned rank = DecodePREncSymbol2Rank(symbol);
int srcStart = left + targetPhrase->words.size();
int srcEnd = srcSize - right - 1;
// false positive consistency check
if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
return TargetPhraseVectorPtr();
// false positive consistency check
if(m_maxRank && rank > m_maxRank)
return TargetPhraseVectorPtr();
// set subphrase by default to itself
TargetPhraseVectorPtr subTpv = tpv;
// if range smaller than source phrase retrieve subphrase
if(unsigned(srcEnd - srcStart + 1) != srcSize) {
SubPhrase<Word> subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1);
subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false);
} else {
// false positive consistency check
if(rank >= tpv->size()-1)
return TargetPhraseVectorPtr();
}
// false positive consistency check
if(subTpv != NULL && rank < subTpv->size()) {
// insert the subphrase into the main target phrase
TPCompact& subTp = subTpv->at(rank);
if(m_phraseDictionary.m_useAlignmentInfo) {
// reconstruct the alignment data based on the alignment of the subphrase
for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
it != subTp.alignment.end(); it++) {
alignment.insert(AlignPointSizeT(srcStart + it->first,
targetPhrase->words.size() + it->second));
}
}
std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
} else
return TargetPhraseVectorPtr();
}
} else {
Word word;
word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
targetPhrase->words.push_back(word);
}
}
} else if(state == Score) {
size_t idx = m_multipleScoreTrees ? scores.size() : 0;
float score = m_scoreTrees[idx]->Read(encodedBitStream);
scores.push_back(score);
if(scores.size() == m_numScoreComponent) {
targetPhrase->scores = scores;
if(m_containsAlignmentInfo)
state = Alignment;
else
state = Add;
}
} else if(state == Alignment) {
AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
if(alignPoint == alignStopSymbol) {
state = Add;
} else {
if(m_phraseDictionary.m_useAlignmentInfo)
alignment.insert(AlignPointSizeT(alignPoint));
}
}
if(state == Add) {
if(m_phraseDictionary.m_useAlignmentInfo) {
size_t sourceSize = sourcePhrase.GetSize();
size_t targetSize = targetPhrase->words.size();
for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
if(it->first >= sourceSize || it->second >= targetSize)
return TargetPhraseVectorPtr();
}
targetPhrase->alignment = alignment;
}
if(m_coding == PREnc) {
if(!m_maxRank || tpv->size() <= m_maxRank)
bitsLeft = encodedBitStream.TellFromEnd();
if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
break;
}
if(encodedBitStream.TellFromEnd() <= 8)
break;
state = New;
}
}
if(m_coding == PREnc && !extending) {
bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
}
return tpv;
}
void PhraseDecoder::PruneCache()
{
m_decodingCache.Prune();
}
}

View File

@ -1,142 +0,0 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <sstream>
#include <vector>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <string>
#include <iterator>
#include <algorithm>
#include <sys/stat.h>
#include "PhraseTableCompact.h"
#include "StringVector.h"
#include "CanonicalHuffman.h"
#include "TargetPhraseCollectionCache.h"
#include "../../Phrase.h"
#include "../../ManagerBase.h"
namespace Moses2
{
class PhraseTableCompact;
class PhraseDecoder
{
protected:
friend class PhraseTableCompact;
typedef std::pair<unsigned char, unsigned char> AlignPoint;
typedef std::pair<unsigned, unsigned> SrcTrg;
enum Coding { None, REnc, PREnc } m_coding;
size_t m_numScoreComponent;
bool m_containsAlignmentInfo;
size_t m_maxRank;
size_t m_maxPhraseLength;
boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
std::vector<size_t> m_lexicalTableIndex;
std::vector<SrcTrg> m_lexicalTable;
CanonicalHuffman<unsigned>* m_symbolTree;
bool m_multipleScoreTrees;
std::vector<CanonicalHuffman<float>*> m_scoreTrees;
CanonicalHuffman<AlignPoint>* m_alignTree;
TargetPhraseCollectionCache m_decodingCache;
PhraseTableCompact& m_phraseDictionary;
// ***********************************************
const std::vector<FactorType>* m_input;
const std::vector<FactorType>* m_output;
std::string m_separator;
// ***********************************************
unsigned GetSourceSymbolId(std::string& s);
std::string GetTargetSymbol(unsigned id) const;
size_t GetREncType(unsigned encodedSymbol);
size_t GetPREncType(unsigned encodedSymbol);
unsigned GetTranslation(unsigned srcIdx, size_t rank);
size_t GetMaxSourcePhraseLength();
unsigned DecodeREncSymbol1(unsigned encodedSymbol);
unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
unsigned DecodeREncSymbol3(unsigned encodedSymbol);
unsigned DecodePREncSymbol1(unsigned encodedSymbol);
int DecodePREncSymbol2Left(unsigned encodedSymbol);
int DecodePREncSymbol2Right(unsigned encodedSymbol);
unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
std::string MakeSourceKey(std::string &);
public:
PhraseDecoder(
PhraseTableCompact &phraseDictionary,
const std::vector<FactorType>* input,
const std::vector<FactorType>* output,
size_t numScoreComponent
);
~PhraseDecoder();
size_t Load(std::FILE* in);
TargetPhraseVectorPtr CreateTargetPhraseCollection(
const ManagerBase &mgr,
const Phrase<Word> &sourcePhrase,
bool topLevel = false,
bool eval = true);
TargetPhraseVectorPtr DecodeCollection(
const ManagerBase &mgr,
TargetPhraseVectorPtr tpv,
BitWrapper<> &encodedBitStream,
const Phrase<Word> &sourcePhrase,
bool topLevel,
bool eval);
void PruneCache();
};
}

View File

@ -1,222 +0,0 @@
#include <boost/algorithm/string/predicate.hpp>
#include <boost/thread/tss.hpp>
#include "PhraseTableCompact.h"
#include "PhraseDecoder.h"
#include "../../PhraseBased/InputPath.h"
#include "../../PhraseBased/Manager.h"
#include "../../PhraseBased/TargetPhrases.h"
#include "../../PhraseBased/TargetPhraseImpl.h"
#include "../../PhraseBased/Sentence.h"
using namespace std;
using namespace boost::algorithm;
namespace Moses2
{
bool PhraseTableCompact::s_inMemoryByDefault = false;
PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line)
:PhraseTable(startInd, line)
,m_inMemory(s_inMemoryByDefault)
,m_useAlignmentInfo(true)
,m_hash(10, 16)
,m_phraseDecoder(0)
{
ReadParameters();
}
PhraseTableCompact::~PhraseTableCompact()
{
}
void PhraseTableCompact::Load(System &system)
{
std::string tFilePath = m_path;
std::string suffix = ".minphr";
if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
if (!FileExists(tFilePath))
throw runtime_error("Error: File " + tFilePath + " does not exist.");
m_phraseDecoder
= new PhraseDecoder(*this, &m_input, &m_output, GetNumScores());
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
size_t indexSize;
//if(m_inMemory)
// Load source phrase index into memory
indexSize = m_hash.Load(pFile);
// else
// Keep source phrase index on disk
//indexSize = m_hash.LoadIndex(pFile);
size_t coderSize = m_phraseDecoder->Load(pFile);
size_t phraseSize;
if(m_inMemory) {
// Load target phrase collections into memory
phraseSize = m_targetPhrasesMemory.load(pFile, false);
}
else {
// Keep target phrase collections on disk
phraseSize = m_targetPhrasesMapped.load(pFile, true);
}
UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
"Not successfully loaded");
}
void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value)
{
if (key == "blah") {
}
else {
PhraseTable::SetParameter(key, value);
}
}
void PhraseTableCompact::CleanUpAfterSentenceProcessing() const
{
//if(!m_sentenceCache.get())
// m_sentenceCache.reset(new PhraseCache());
m_phraseDecoder->PruneCache();
//m_sentenceCache->clear();
}
// pb
void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
{
size_t inputSize = static_cast<const Sentence&>(mgr.GetInput()).GetSize();
InputPaths &inputPathsCast = static_cast<InputPaths&>(inputPaths);
for (size_t i = 0; i < inputSize; ++i) {
for (size_t startPos = 0; startPos < inputSize; ++startPos) {
size_t endPos = startPos + i;
if (endPos >= inputSize) {
break;
}
InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i);
//cerr << "path=" << path->Debug(mgr.system) << endl;
TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
path->AddTargetPhrases(*this, tps);
}
}
}
TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const
{
TargetPhrases *ret = NULL;
const Phrase<Word> &sourcePhrase = inputPath.subPhrase;
//cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl;
// There is no souch source phrase if source phrase is longer than longest
// observed source phrase during compilation
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
return ret;
// Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl
= m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true);
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
//TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, decodedPhraseColl->size());
for (size_t i = 0; i < decodedPhraseColl->size(); ++i) {
const TPCompact &tpCompact = decodedPhraseColl->at(i);
const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase);
ret->AddTargetPhrase(*tp);
}
ret->SortAndPrune(m_tableLimit);
mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase);
//cerr << "RET2=" << ret->Debug(mgr.system) << endl;
/*
// Cache phrase pair for clean-up or retrieval with PREnc
const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
return phraseColl;
*/
}
return ret;
}
const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase(
const Manager &mgr,
const TPCompact &tpCompact,
const Phrase<Word> &sourcePhrase) const
{
MemPool &pool = mgr.GetPool();
size_t size = tpCompact.words.size();
TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size);
// words
for (size_t i = 0; i < size; ++i) {
const Word &compactWord = tpCompact.words[i];
Word &tpWord = (*ret)[i];
tpWord = compactWord;
}
// scores
Scores &scores = ret->GetScores();
scores.Assign(mgr.system, *this, tpCompact.scores);
// align
ret->SetAlignTerm(tpCompact.alignment);
// score
mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret);
// Cache phrase pair for clean-up or retrieval with PREnc
//const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
//cerr << "ret=" << ret->Debug(mgr.system) << endl;
return ret;
}
// scfg
void PhraseTableCompact::InitActiveChart(
MemPool &pool,
const SCFG::Manager &mgr,
SCFG::InputPath &path) const
{
UTIL_THROW2("Not implemented");
}
void PhraseTableCompact::Lookup(
MemPool &pool,
const SCFG::Manager &mgr,
size_t maxChartSpan,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const
{
UTIL_THROW2("Not implemented");
}
void PhraseTableCompact::LookupGivenNode(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::ActiveChartEntry &prevEntry,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const
{
UTIL_THROW2("Not implemented");
}
}

View File

@ -1,68 +0,0 @@
#pragma once
#include "../PhraseTable.h"
#include "BlockHashIndex.h"
namespace Moses2
{
class PhraseDecoder;
class TPCompact;
class PhraseTableCompact: public PhraseTable
{
public:
PhraseTableCompact(size_t startInd, const std::string &line);
virtual ~PhraseTableCompact();
void Load(System &system);
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void CleanUpAfterSentenceProcessing() const;
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const;
// scfg
virtual void InitActiveChart(
MemPool &pool,
const SCFG::Manager &mgr,
SCFG::InputPath &path) const;
virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
virtual void Lookup(
MemPool &pool,
const SCFG::Manager &mgr,
size_t maxChartSpan,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const;
protected:
static bool s_inMemoryByDefault;
bool m_inMemory;
bool m_useAlignmentInfo;
BlockHashIndex m_hash;
StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
friend class PhraseDecoder;
PhraseDecoder* m_phraseDecoder;
const TargetPhraseImpl *CreateTargetPhrase(
const Manager &mgr,
const TPCompact &tpCompact,
const Phrase<Word> &sourcePhrase) const;
// SCFG
virtual void LookupGivenNode(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::ActiveChartEntry &prevEntry,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
};
}

View File

@ -1,266 +0,0 @@
/*
* StoreTarget.cpp
*
* Created on: 19 Jan 2016
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "StoreTarget.h"
#include "line_splitter.hh"
#include "probing_hash_utils.hh"
#include "../../legacy/OutputFileStream.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
StoreTarget::StoreTarget(const std::string &basepath)
:m_basePath(basepath)
,m_vocab(basepath + "/TargetVocab.dat")
{
std::string path = basepath + "/TargetColl.dat";
m_fileTargetColl.open(path.c_str(),
std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc);
if (!m_fileTargetColl.is_open()) {
throw "can't create file ";
}
}
StoreTarget::~StoreTarget()
{
assert(m_coll.empty());
m_fileTargetColl.close();
// vocab
m_vocab.Save();
}
uint64_t StoreTarget::Save()
{
uint64_t ret = m_fileTargetColl.tellp();
// save to disk
uint64_t numTP = m_coll.size();
m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t));
for (size_t i = 0; i < m_coll.size(); ++i) {
Save(*m_coll[i]);
}
// clear coll
RemoveAllInColl(m_coll);
m_coll.clear();
// starting position of coll
return ret;
}
void StoreTarget::Save(const target_text &rule)
{
// metadata for each tp
TargetPhraseInfo tpInfo;
tpInfo.alignTerm = GetAlignId(rule.word_align_term);
tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term);
tpInfo.numWords = rule.target_phrase.size();
tpInfo.propLength = rule.property.size();
//cerr << "TPInfo=" << sizeof(TPInfo);
m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo));
// scores
for (size_t i = 0; i < rule.prob.size(); ++i) {
float prob = rule.prob[i];
m_fileTargetColl.write((char*) &prob, sizeof(prob));
}
// tp
for (size_t i = 0; i < rule.target_phrase.size(); ++i) {
uint32_t vocabId = rule.target_phrase[i];
m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId));
}
// prop TODO
}
void StoreTarget::SaveAlignment()
{
std::string path = m_basePath + "/Alignments.dat";
Moses2::OutputFileStream file(path);
BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
file << valPair.second << "\t";
const std::vector<size_t> &aligns = valPair.first;
BOOST_FOREACH(size_t align, aligns) {
file << align << " ";
}
file << endl;
}
}
void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
{
target_text *rule = new target_text;
//cerr << "line.target_phrase=" << line.target_phrase << endl;
// target_phrase
vector<bool> nonTerms;
util::TokenIter<util::SingleCharacter> it;
it = util::TokenIter<util::SingleCharacter>(line.target_phrase,
util::SingleCharacter(' '));
while (it) {
StringPiece word = *it;
//cerr << "word=" << word << endl;
bool nonTerm = false;
if (scfg) {
// not really sure how to handle factored SCFG and NT
if (scfg && word[0] == '[' && word[word.size() - 1] == ']') {
//cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl;
nonTerm = true;
}
nonTerms.push_back(nonTerm);
}
util::TokenIter<util::SingleCharacter> itFactor;
itFactor = util::TokenIter<util::SingleCharacter>(word,
util::SingleCharacter('|'));
while (itFactor) {
StringPiece factor = *itFactor;
string factorStr = factor.as_string();
uint32_t vocabId = m_vocab.GetVocabId(factorStr);
rule->target_phrase.push_back(vocabId);
itFactor++;
}
it++;
}
// probs
it = util::TokenIter<util::SingleCharacter>(line.prob,
util::SingleCharacter(' '));
while (it) {
string tok = it->as_string();
float prob = Scan<float>(tok);
if (log_prob) {
prob = FloorScore(log(prob));
if (prob == 0.0f) prob = 0.0000000001;
}
rule->prob.push_back(prob);
it++;
}
/*
cerr << "nonTerms=";
for (size_t i = 0; i < nonTerms.size(); ++i) {
cerr << nonTerms[i] << " ";
}
cerr << endl;
*/
// alignment
it = util::TokenIter<util::SingleCharacter>(line.word_align,
util::SingleCharacter(' '));
while (it) {
string tokPair = Trim(it->as_string());
if (tokPair.empty()) {
break;
}
vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
assert(alignPair.size() == 2);
bool nonTerm = false;
size_t sourcePos = alignPair[0];
size_t targetPos = alignPair[1];
if (scfg) {
nonTerm = nonTerms[targetPos];
}
//cerr << targetPos << "=" << nonTerm << endl;
if (nonTerm) {
rule->word_align_non_term.push_back(sourcePos);
rule->word_align_non_term.push_back(targetPos);
//cerr << (int) rule->word_all1.back() << " ";
}
else {
rule->word_align_term.push_back(sourcePos);
rule->word_align_term.push_back(targetPos);
}
it++;
}
// extra scores
string prop = line.property.as_string();
AppendLexRO(prop, rule->prob, log_prob);
//cerr << "line.property=" << line.property << endl;
//cerr << "prop=" << prop << endl;
// properties
/*
for (size_t i = 0; i < prop.size(); ++i) {
rule->property.push_back(prop[i]);
}
*/
m_coll.push_back(rule);
}
uint32_t StoreTarget::GetAlignId(const std::vector<size_t> &align)
{
boost::unordered_map<std::vector<size_t>, uint32_t>::iterator iter =
m_aligns.find(align);
if (iter == m_aligns.end()) {
uint32_t ind = m_aligns.size();
m_aligns[align] = ind;
return ind;
}
else {
return iter->second;
}
}
void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
bool log_prob) const
{
size_t startPos = prop.find("{{LexRO ");
if (startPos != string::npos) {
size_t endPos = prop.find("}}", startPos + 8);
string lexProb = prop.substr(startPos + 8, endPos - startPos - 8);
//cerr << "lexProb=" << lexProb << endl;
// append lex probs to pt probs
vector<float> scores = Tokenize<float>(lexProb);
if (log_prob) {
for (size_t i = 0; i < scores.size(); ++i) {
scores[i] = FloorScore(log(scores[i]));
if (scores[i] == 0.0f) scores[i] = 0.0000000001;
}
}
for (size_t i = 0; i < scores.size(); ++i) {
retvector.push_back(scores[i]);
}
// exclude LexRO property from property column
prop = prop.substr(0, startPos)
+ prop.substr(endPos + 2, prop.size() - endPos - 2);
//cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
}
}
} /* namespace Moses2 */

View File

@ -1,51 +0,0 @@
/*
* StoreTarget.h
*
* Created on: 19 Jan 2016
* Author: hieu
*/
#pragma once
#include <string>
#include <fstream>
#include <vector>
#include <inttypes.h>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include "StoreVocab.h"
namespace Moses2
{
class line_text;
class target_text;
class StoreTarget
{
public:
StoreTarget(const std::string &basepath);
virtual ~StoreTarget();
uint64_t Save();
void SaveAlignment();
void Append(const line_text &line, bool log_prob, bool scfg);
protected:
std::string m_basePath;
std::fstream m_fileTargetColl;
StoreVocab<uint32_t> m_vocab;
typedef boost::unordered_map<std::vector<size_t>, uint32_t> Alignments;
Alignments m_aligns;
std::vector<target_text*> m_coll;
uint32_t GetAlignId(const std::vector<size_t> &align);
void Save(const target_text &rule);
void AppendLexRO(std::string &prop, std::vector<float> &retvector,
bool log_prob) const;
};
} /* namespace Moses2 */

View File

@ -1,64 +0,0 @@
/*
* StoreVocab.h
*
* Created on: 15 Jun 2016
* Author: hieu
*/
#pragma once
#include <string>
#include <boost/unordered_map.hpp>
#include "../../legacy/OutputFileStream.h"
#include "../../legacy/Util2.h"
namespace Moses2
{
template<typename VOCABID>
class StoreVocab
{
protected:
std::string m_path;
typedef boost::unordered_map<std::string, VOCABID> Coll;
Coll m_vocab;
public:
StoreVocab(const std::string &path)
:m_path(path)
{}
virtual ~StoreVocab() {}
VOCABID GetVocabId(const std::string &word)
{
typename Coll::iterator iter = m_vocab.find(word);
if (iter == m_vocab.end()) {
VOCABID ind = m_vocab.size() + 1;
m_vocab[word] = ind;
return ind;
}
else {
return iter->second;
}
}
void Insert(VOCABID id, const std::string &word)
{
m_vocab[word] = id;
}
void Save()
{
OutputFileStream strme(m_path);
typename Coll::const_iterator iter;
for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) {
strme << iter->first << "\t" << iter->second << std::endl;
}
strme.Close();
}
};
} /* namespace Moses2 */

View File

@ -1,44 +0,0 @@
#include <iostream>
#include "hash.hh"
using namespace std;
namespace Moses2
{
uint64_t getHash(StringPiece text)
{
std::size_t len = text.size();
uint64_t key = util::MurmurHashNative(text.data(), len);
return key;
}
std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
{
//Tokenize
std::vector<uint64_t> output;
util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
while (itWord) {
StringPiece word = *itWord;
uint64_t id = 0;
util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
while (itFactor) {
StringPiece factor = *itFactor;
//cerr << "factor=" << factor << endl;
id += getHash(factor);
itFactor++;
}
output.push_back(id);
itWord++;
}
return output;
}
}

View File

@ -1,17 +0,0 @@
#pragma once
#include "util/string_piece.hh"
#include "util/murmur_hash.hh"
#include "util/string_piece.hh" //Tokenization and work with StringPiece
#include "util/tokenize_piece.hh"
#include <vector>
namespace Moses2
{
//Gets the MurmurmurHash for give string
uint64_t getHash(StringPiece text);
std::vector<uint64_t> getVocabIDs(const StringPiece &textin);
}

View File

@ -1,59 +0,0 @@
#pragma once
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
#include "util/file_piece.hh"
#include <vector>
#include <cstdlib> //atof
#include "util/string_piece.hh" //Tokenization and work with StringPiece
#include "util/tokenize_piece.hh"
#include <vector>
namespace Moses2
{
//Struct for holding processed line
struct line_text
{
StringPiece source_phrase;
StringPiece target_phrase;
StringPiece prob;
StringPiece word_align;
StringPiece counts;
StringPiece sparse_score;
StringPiece property;
std::string property_to_be_binarized;
};
//Struct for holding processed line
struct target_text
{
std::vector<unsigned int> target_phrase;
std::vector<float> prob;
std::vector<size_t> word_align_term;
std::vector<size_t> word_align_non_term;
std::vector<char> counts;
std::vector<char> sparse_score;
std::vector<char> property;
/*
void Reset()
{
target_phrase.clear();
prob.clear();
word_all1.clear();
counts.clear();
sparse_score.clear();
property.clear();
}
*/
};
//Ask if it's better to have it receive a pointer to a line_text struct
line_text splitLine(const StringPiece &textin, bool scfg);
void reformatSCFG(line_text &output);
std::vector<unsigned char> splitWordAll1(const StringPiece &textin);
}

View File

@ -1,50 +0,0 @@
#include "probing_hash_utils.hh"
namespace Moses2
{
//Read table from disk, return memory map location
char * readTable(const char * filename, size_t size)
{
//Initial position of the file is the end of the file, thus we know the size
int fd;
char * map;
fd = open(filename, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
return map;
}
void serialize_table(char *mem, size_t size, const std::string &filename)
{
std::ofstream os(filename.c_str(), std::ios::binary);
os.write((const char*) &mem[0], size);
os.close();
}
uint64_t getKey(const uint64_t source_phrase[], size_t size)
{
//TOO SLOW
//uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
uint64_t key = 0;
for (size_t i = 0; i < size; i++) {
key += (source_phrase[i] << i);
}
return key;
}
}

View File

@ -1,55 +0,0 @@
#pragma once
#include "util/probing_hash_table.hh"
#include <sys/mman.h>
#include <boost/functional/hash.hpp>
#include <fcntl.h>
#include <fstream>
namespace Moses2
{
#define API_VERSION 15
//Hash table entry
struct Entry
{
typedef uint64_t Key;
Key key;
Key GetKey() const
{
return key;
}
void SetKey(Key to)
{
key = to;
}
uint64_t value;
};
#define NONE std::numeric_limits<uint64_t>::max()
//Define table
typedef util::ProbingHashTable<Entry, boost::hash<uint64_t> > Table;
void serialize_table(char *mem, size_t size, const std::string &filename);
char * readTable(const char * filename, size_t size);
uint64_t getKey(const uint64_t source_phrase[], size_t size);
struct TargetPhraseInfo
{
uint32_t alignTerm;
uint32_t alignNonTerm;
uint16_t numWords;
uint16_t propLength;
uint16_t filler;
};
}

View File

@ -1,65 +0,0 @@
#pragma once
#include <boost/unordered_map.hpp>
#include <sys/stat.h> //For finding size of file
#include "vocabid.hh"
#include <algorithm> //toLower
#include <deque>
#include "probing_hash_utils.hh"
#include "hash.hh" //Includes line splitter
#include "line_splitter.hh"
#include "../../legacy/Util2.h"
namespace Moses2
{
class QueryEngine
{
std::map<uint64_t, std::string> source_vocabids;
typedef std::vector<unsigned char> Alignments;
std::vector<Alignments> alignColl;
Table table;
char *mem; //Memory for the table, necessary so that we can correctly destroy the object
size_t table_filesize;
bool is_reordering;
void read_alignments(const std::string &alignPath);
public:
int num_scores;
int num_lex_scores;
bool logProb;
QueryEngine(const char *);
~QueryEngine();
std::pair<bool, uint64_t> query(uint64_t key);
const std::map<uint64_t, std::string> &getSourceVocab() const
{ return source_vocabids; }
const std::vector<Alignments> &getAlignments() const
{ return alignColl; }
uint64_t getKey(uint64_t source_phrase[], size_t size) const;
template<typename T>
inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const
{
boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
if (iter == keyValue.end()) {
return false;
}
const std::string &foundStr = iter->second;
found = Scan<T>(foundStr);
return true;
}
};
}

View File

@ -1,303 +0,0 @@
#include <sys/stat.h>
#include <boost/foreach.hpp>
#include "line_splitter.hh"
#include "storing.hh"
#include "StoreTarget.h"
#include "StoreVocab.h"
#include "../../legacy/Util2.h"
#include "../../legacy/InputFileStream.h"
using namespace std;
namespace Moses2
{
///////////////////////////////////////////////////////////////////////
void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos)
{
if (pos < sourcePhrase.size()) {
uint64_t vocabId = sourcePhrase[pos];
Node *child;
Children::iterator iter = m_children.find(vocabId);
if (iter == m_children.end()) {
// New node. Write other children then discard them
BOOST_FOREACH(Children::value_type &valPair, m_children) {
Node &otherChild = valPair.second;
otherChild.Write(table);
}
m_children.clear();
// create new node
child = &m_children[vocabId];
assert(!child->done);
child->key = key + (vocabId << pos);
}
else {
child = &iter->second;
}
child->Add(table, sourcePhrase, pos + 1);
}
else {
// this node was written previously 'cos it has rules
done = true;
}
}
void Node::Write(Table &table)
{
//cerr << "START write " << done << " " << key << endl;
BOOST_FOREACH(Children::value_type &valPair, m_children) {
Node &child = valPair.second;
child.Write(table);
}
if (!done) {
// save
Entry sourceEntry;
sourceEntry.value = NONE;
sourceEntry.key = key;
//Put into table
table.Insert(sourceEntry);
}
}
///////////////////////////////////////////////////////////////////////
void createProbingPT(const std::string &phrasetable_path,
const std::string &basepath, int num_scores, int num_lex_scores,
bool log_prob, int max_cache_size, bool scfg)
{
std::cerr << "Starting..." << std::endl;
//Get basepath and create directory if missing
mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
StoreTarget storeTarget(basepath);
//Get uniq lines:
unsigned long uniq_entries = countUniqueSource(phrasetable_path);
//Source phrase vocabids
StoreVocab<uint64_t> sourceVocab(basepath + "/source_vocabids");
//Read the file
util::FilePiece filein(phrasetable_path.c_str());
//Init the probing hash table
size_t size = Table::Size(uniq_entries, 1.2);
char * mem = new char[size];
memset(mem, 0, size);
Table sourceEntries(mem, size);
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> cache;
float totalSourceCount = 0;
//Keep track of the size of each group of target phrases
size_t line_num = 0;
//Read everything and processs
std::string prevSource;
Node sourcePhrases;
sourcePhrases.done = true;
sourcePhrases.key = 0;
while (true) {
try {
//Process line read
line_text line;
line = splitLine(filein.ReadLine(), scfg);
//cerr << "line=" << line.source_phrase << endl;
++line_num;
if (line_num % 1000000 == 0) {
std::cerr << line_num << " " << std::flush;
}
//Add source phrases to vocabularyIDs
add_to_map(sourceVocab, line.source_phrase);
if (prevSource.empty()) {
// 1st line
prevSource = line.source_phrase.as_string();
storeTarget.Append(line, log_prob, scfg);
}
else if (prevSource == line.source_phrase) {
//If we still have the same line, just append to it:
storeTarget.Append(line, log_prob, scfg);
}
else {
assert(prevSource != line.source_phrase);
//Create a new entry even
// save
uint64_t targetInd = storeTarget.Save();
// next line
storeTarget.Append(line, log_prob, scfg);
//Create an entry for the previous source phrase:
Entry sourceEntry;
sourceEntry.value = targetInd;
//The key is the sum of hashes of individual words bitshifted by their position in the phrase.
//Probably not entirerly correct, but fast and seems to work fine in practise.
std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
if (scfg) {
// storing prefixes?
sourcePhrases.Add(sourceEntries, vocabid_source);
}
sourceEntry.key = getKey(vocabid_source);
/*
cerr << "prevSource=" << prevSource << flush
<< " vocabids=" << Debug(vocabid_source) << flush
<< " key=" << sourceEntry.key << endl;
*/
//Put into table
sourceEntries.Insert(sourceEntry);
// update cache - CURRENT source phrase, not prev
if (max_cache_size) {
std::string countStr = line.counts.as_string();
countStr = Trim(countStr);
if (!countStr.empty()) {
std::vector<float> toks = Tokenize<float>(countStr);
//cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
if (toks.size() >= 2) {
totalSourceCount += toks[1];
// compute key for CURRENT source
std::vector<uint64_t> currVocabidSource = getVocabIDs(line.source_phrase.as_string());
uint64_t currKey = getKey(currVocabidSource);
CacheItem *item = new CacheItem(
Trim(line.source_phrase.as_string()),
currKey,
toks[1]);
cache.push(item);
if (max_cache_size > 0 && cache.size() > max_cache_size) {
cache.pop();
}
}
}
}
//Set prevLine
prevSource = line.source_phrase.as_string();
}
}
catch (util::EndOfFileException e) {
std::cerr
<< "Reading phrase table finished, writing remaining files to disk."
<< std::endl;
//After the final entry is constructed we need to add it to the phrase_table
//Create an entry for the previous source phrase:
uint64_t targetInd = storeTarget.Save();
Entry sourceEntry;
sourceEntry.value = targetInd;
//The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
sourceEntry.key = getKey(vocabid_source);
//Put into table
sourceEntries.Insert(sourceEntry);
break;
}
}
sourcePhrases.Write(sourceEntries);
storeTarget.SaveAlignment();
serialize_table(mem, size, (basepath + "/probing_hash.dat"));
sourceVocab.Save();
serialize_cache(cache, (basepath + "/cache"), totalSourceCount);
delete[] mem;
//Write configfile
std::ofstream configfile;
configfile.open((basepath + "/config").c_str());
configfile << "API_VERSION\t" << API_VERSION << '\n';
configfile << "uniq_entries\t" << uniq_entries << '\n';
configfile << "num_scores\t" << num_scores << '\n';
configfile << "num_lex_scores\t" << num_lex_scores << '\n';
configfile << "log_prob\t" << log_prob << '\n';
configfile.close();
}
size_t countUniqueSource(const std::string &path)
{
size_t ret = 0;
InputFileStream strme(path);
std::string line, prevSource;
while (std::getline(strme, line)) {
std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
assert(toks.size() != 0);
if (prevSource != toks[0]) {
prevSource = toks[0];
++ret;
}
}
return ret;
}
void serialize_cache(
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
const std::string &path, float totalSourceCount)
{
std::vector<const CacheItem*> vec(cache.size());
size_t ind = cache.size() - 1;
while (!cache.empty()) {
const CacheItem *item = cache.top();
vec[ind] = item;
cache.pop();
--ind;
}
std::ofstream os(path.c_str());
os << totalSourceCount << std::endl;
for (size_t i = 0; i < vec.size(); ++i) {
const CacheItem *item = vec[i];
os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl;
delete item;
}
os.close();
}
uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
{
return Moses2::getKey(vocabid_source.data(), vocabid_source.size());
}
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
{
assert(endPos < vocabid_source.size());
std::vector<uint64_t> ret(endPos + 1);
for (size_t i = 0; i <= endPos; ++i) {
ret[i] = vocabid_source[i];
}
return ret;
}
}

View File

@ -1,95 +0,0 @@
#pragma once
#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>
#include <cstdio>
#include <sstream>
#include <fstream>
#include <iostream>
#include <string>
#include <queue>
#include <sys/stat.h> //mkdir
#include "hash.hh" //Includes line_splitter
#include "probing_hash_utils.hh"
#include "util/file_piece.hh"
#include "util/file.hh"
#include "vocabid.hh"
namespace Moses2
{
typedef std::vector<uint64_t> SourcePhrase;
class Node
{
typedef boost::unordered_map<uint64_t, Node> Children;
Children m_children;
public:
uint64_t key;
bool done;
Node()
:done(false)
{}
void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
void Write(Table &table);
};
void createProbingPT(const std::string &phrasetable_path,
const std::string &basepath, int num_scores, int num_lex_scores,
bool log_prob, int max_cache_size, bool scfg);
uint64_t getKey(const std::vector<uint64_t> &source_phrase);
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
template<typename T>
std::string Debug(const std::vector<T> &vec)
{
std::stringstream strm;
for (size_t i = 0; i < vec.size(); ++i) {
strm << vec[i] << " ";
}
return strm.str();
}
size_t countUniqueSource(const std::string &path);
class CacheItem
{
public:
std::string source;
uint64_t sourceKey;
float count;
CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
:source(vSource)
,sourceKey(vSourceKey)
,count(vCount)
{
}
bool operator<(const CacheItem &other) const
{
return count > other.count;
}
};
class CacheItemOrderer
{
public:
bool operator()(const CacheItem* a, const CacheItem* b) const
{
return (*a) < (*b);
}
};
void serialize_cache(
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
const std::string &path, float totalSourceCount);
}

View File

@ -1,59 +0,0 @@
#include <boost/foreach.hpp>
#include "vocabid.hh"
#include "StoreVocab.h"
#include "../../legacy/Util2.h"
namespace Moses2
{
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
const StringPiece &textin)
{
//Tokenize
util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
while (itWord) {
StringPiece word = *itWord;
util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
while (itFactor) {
StringPiece factor = *itFactor;
sourceVocab.Insert(getHash(factor), factor.as_string());
itFactor++;
}
itWord++;
}
}
void serialize_map(const std::map<uint64_t, std::string> &karta,
const std::string &filename)
{
std::ofstream os(filename.c_str());
std::map<uint64_t, std::string>::const_iterator iter;
for (iter = karta.begin(); iter != karta.end(); ++iter) {
os << iter->first << '\t' << iter->second << std::endl;
}
os.close();
}
void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
{
std::ifstream is(filename);
std::string line;
while (getline(is, line)) {
std::vector<std::string> toks = Tokenize(line, "\t");
assert(toks.size() == 2);
uint64_t ind = Scan<uint64_t>(toks[1]);
karta[ind] = toks[0];
}
//Close the stream after we are done.
is.close();
}
}

View File

@ -1,29 +0,0 @@
//Serialization
#include <boost/serialization/serialization.hpp>
#include <boost/serialization/map.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <fstream>
#include <iostream>
#include <vector>
#include <map> //Container
#include "hash.hh" //Hash of elements
#include "util/string_piece.hh" //Tokenization and work with StringPiece
#include "util/tokenize_piece.hh"
namespace Moses2
{
template<typename VOCABID>
class StoreVocab;
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
const StringPiece &textin);
void serialize_map(const std::map<uint64_t, std::string> &karta,
const std::string &filename);
void read_map(std::map<uint64_t, std::string> &karta, const char* filename);
}

View File

@ -1,91 +0,0 @@
/*
* Transliteration.h
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#pragma once
#include "PhraseTable.h"
namespace Moses2
{
class Sentence;
class InputPaths;
class Range;
class Transliteration: public PhraseTable
{
public:
Transliteration(size_t startInd, const std::string &line);
virtual ~Transliteration();
void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const;
virtual void
EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void InitActiveChart(
MemPool &pool,
const SCFG::Manager &mgr,
SCFG::InputPath &path) const;
void Lookup(MemPool &pool,
const SCFG::Manager &mgr,
size_t maxChartSpan,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const;
void LookupUnary(MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const;
protected:
virtual void LookupNT(
MemPool &pool,
const SCFG::Manager &mgr,
const Moses2::Range &subPhraseRange,
const SCFG::InputPath &prevPath,
const SCFG::Stacks &stacks,
SCFG::InputPath &outPath) const;
virtual void LookupGivenWord(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::InputPath &prevPath,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
virtual void LookupGivenNode(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::ActiveChartEntry &prevEntry,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
void SetParameter(const std::string& key, const std::string& value);
protected:
std::string m_filePath;
std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
std::vector<TargetPhraseImpl*> CreateTargetPhrases(
const Manager &mgr,
MemPool &pool,
const SubPhrase<Moses2::Word> &sourcePhrase,
const std::string &outDir) const;
};
}

View File

@ -1,89 +0,0 @@
/*
* UnknownWordPenalty.h
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#pragma once
#include "PhraseTable.h"
namespace Moses2
{
class Sentence;
class InputPaths;
class Range;
class UnknownWordPenalty: public PhraseTable
{
public:
UnknownWordPenalty(size_t startInd, const std::string &line);
virtual ~UnknownWordPenalty();
virtual void SetParameter(const std::string& key, const std::string& value);
void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const;
void ProcessXML(
const Manager &mgr,
MemPool &pool,
const Sentence &sentence,
InputPaths &inputPaths) const;
virtual void
EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void InitActiveChart(
MemPool &pool,
const SCFG::Manager &mgr,
SCFG::InputPath &path) const;
void Lookup(MemPool &pool,
const SCFG::Manager &mgr,
size_t maxChartSpan,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const;
void LookupUnary(MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const;
protected:
virtual void LookupNT(
MemPool &pool,
const SCFG::Manager &mgr,
const Moses2::Range &subPhraseRange,
const SCFG::InputPath &prevPath,
const SCFG::Stacks &stacks,
SCFG::InputPath &outPath) const;
virtual void LookupGivenWord(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::InputPath &prevPath,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
virtual void LookupGivenNode(
MemPool &pool,
const SCFG::Manager &mgr,
const SCFG::ActiveChartEntry &prevEntry,
const SCFG::Word &wordSought,
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
protected:
bool m_drop;
std::string m_prefix, m_suffix;
};
}

View File

@ -1,161 +0,0 @@
/*
* CubePruning.cpp
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#include "Misc.h"
#include "Stack.h"
#include "../Manager.h"
#include "../../MemPool.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningBitmapStack
{
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
}
else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
}
else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
}
return ret;
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
:edge(&edge)
,hypoIndex(hypoIndex)
,tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
this->tpIndex = tpIndex;
CreateHypothesis(mgr);
}
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo = edge->hypos[hypoIndex];
const TargetPhrase &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
//cerr << prevHypo << endl;
//cerr << *prevHypo << endl;
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
hypo->EvaluateWhenApplied();
}
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(
Manager &mgr,
const Hypotheses &hypos,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap)
:hypos(hypos)
,path(path)
,tps(tps)
,newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
{
out << obj.newBitmap;
return out;
}
bool
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
SeenPositionItem val(this, (x<<16) + y);
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
return pairRet.second;
}
void CubeEdge::CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
assert(hypos.size());
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
void CubeEdge::CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
}
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
if (item) {
// recycle unused queue item
queueItemRecycler.push_back(item);
}
}
}
}

View File

@ -1,111 +0,0 @@
/*
* CubePruning.h
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <queue>
#include "../../legacy/Range.h"
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "Stack.h"
namespace Moses2
{
class Manager;
class InputPath;
class TargetPhrases;
class Bitmap;
namespace NSCubePruningBitmapStack
{
class CubeEdge;
///////////////////////////////////////////
class QueueItem
{
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
CubeEdge *edge;
size_t hypoIndex, tpIndex;
Hypothesis *hypo;
protected:
void CreateHypothesis(Manager &mgr);
};
///////////////////////////////////////////
class QueueItemOrderer
{
public:
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
};
///////////////////////////////////////////
class CubeEdge
{
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
public:
typedef std::priority_queue<QueueItem*,
std::vector<QueueItem*>,
QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem,
boost::hash<SeenPositionItem>,
std::equal_to<SeenPositionItem> > SeenPositions;
const Hypotheses &hypos;
const InputPath &path;
const TargetPhrases &tps;
const Bitmap &newBitmap;
SCORE estimatedScore;
CubeEdge(Manager &mgr,
const Hypotheses &hypos,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
void CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
protected:
};
}
}

View File

@ -1,206 +0,0 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "Stack.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../../InputPaths.h"
#include "../../InputPath.h"
#include "../../System.h"
#include "../../Sentence.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningBitmapStack
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
,m_stack(mgr)
,m_queue(QueueItemOrderer(), std::vector<QueueItem*>() )
,m_seenPositions()
{
}
Search::~Search()
{
}
void Search::Decode()
{
// init cue edges
m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
}
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stack.Add(initHypo, mgr.GetHypoRecycle());
PostDecode(0);
for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
m_stack.Clear();
Decode(stackInd);
PostDecode(stackInd);
//m_stack.DebugCounts();
//cerr << m_stacks << endl;
}
}
void Search::Decode(size_t stackInd)
{
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*> &container = Container(m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container) {
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[stackInd];
BOOST_FOREACH(CubeEdge *edge, edges) {
//cerr << *edge << " ";
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
/*
cerr << "edges: ";
boost::unordered_set<const Bitmap*> uniqueBM;
BOOST_FOREACH(CubeEdge *edge, edges) {
uniqueBM.insert(&edge->newBitmap);
//cerr << *edge << " ";
}
cerr << edges.size() << " " << uniqueBM.size();
cerr << endl;
*/
size_t pops = 0;
while (!m_queue.empty() && pops < mgr.system.popLimit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stack.Add(hypo, hypoRecycler);
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
/*
// create hypo from every edge. Increase diversity
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
CubeEdge &edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, mgr.GetHypoRecycle());
}
}
*/
}
void Search::PostDecode(size_t stackInd)
{
MemPool &pool = mgr.GetPool();
Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
const Bitmap &hypoBitmap = *val.first.first;
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
// create edges to next hypos from existing hypos
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPath *path, paths) {
const Range &pathRange = path->range;
//cerr << "pathRange=" << pathRange << endl;
if (!path->IsUsed()) {
continue;
}
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
size_t numWords = newBitmap.GetNumWordsCovered();
CubeEdges &edges = *m_cubeEdges[numWords];
// sort hypo for a particular bitmap and hypoEndPos
Hypotheses &sortedHypos = *val.second;
size_t numPt = mgr.system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = path->targetPhrases[i];
if (tps && tps->GetSize()) {
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
edges.push_back(edge);
}
}
}
}
}
const Hypothesis *Search::GetBestHypo() const
{
std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = sortedHypos[0];
}
return best;
}
}
}

View File

@ -1,57 +0,0 @@
/*
* Search.h
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include "../Search.h"
#include "Misc.h"
#include "Stack.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class Bitmap;
class Hypothesis;
class InputPath;
class TargetPhrases;
namespace NSCubePruningBitmapStack
{
class Search : public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypo() const;
protected:
Stack m_stack;
CubeEdge::Queue m_queue;
CubeEdge::SeenPositions m_seenPositions;
// CUBE PRUNING VARIABLES
// setup
typedef std::vector<CubeEdge*> CubeEdges;
std::vector<CubeEdges*> m_cubeEdges;
std::deque<QueueItem*> m_queueItemRecycler;
// CUBE PRUNING
// decoding
void Decode(size_t stackInd);
void PostDecode(size_t stackInd);
};
}
}

View File

@ -1,303 +0,0 @@
/*
* Stack.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include <algorithm>
#include <boost/foreach.hpp>
#include "Stack.h"
#include "../Hypothesis.h"
#include "../Manager.h"
#include "../../Scores.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningBitmapStack
{
MiniStack::MiniStack(const Manager &mgr)
:m_coll()
,m_sortedHypos(NULL)
{}
StackAdd MiniStack::Add(const Hypothesis *hypo)
{
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
// CHECK RECOMBINATION
if (addRet.second) {
// equiv hypo doesn't exists
return StackAdd(true, NULL);
}
else {
const Hypothesis *hypoExisting = *addRet.first;
if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
// incoming hypo is better than the one we have
const Hypothesis *const &hypoExisting1 = *addRet.first;
const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
hypoExisting2 = hypo;
return StackAdd(true, const_cast<Hypothesis*>(hypoExisting));
}
else {
// already storing the best hypo. discard incoming hypo
return StackAdd(false, const_cast<Hypothesis*>(hypo));
}
}
assert(false);
}
Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const
{
if (m_sortedHypos == NULL) {
// create sortedHypos first
MemPool &pool = mgr.GetPool();
m_sortedHypos = new (pool.Allocate< Vector<const Hypothesis*> >()) Vector<const Hypothesis*>(pool, m_coll.size());
size_t ind = 0;
BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
(*m_sortedHypos)[ind] = hypo;
++ind;
}
SortAndPruneHypos(mgr);
}
return *m_sortedHypos;
}
void MiniStack::SortAndPruneHypos(const Manager &mgr) const
{
size_t stackSize = mgr.system.stackSize;
Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
/*
cerr << "UNSORTED hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
const Hypothesis *hypo = hypos[i];
cerr << *hypo << endl;
}
cerr << endl;
*/
Hypotheses::iterator iterMiddle;
iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize)
? m_sortedHypos->end()
: m_sortedHypos->begin() + stackSize;
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
HypothesisFutureScoreOrderer());
// prune
if (stackSize && m_sortedHypos->size() > stackSize) {
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
Hypothesis *hypo = const_cast<Hypothesis*>((*m_sortedHypos)[i]);
recycler.Recycle(hypo);
}
m_sortedHypos->resize(stackSize);
}
/*
cerr << "sorted hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
const Hypothesis *hypo = hypos[i];
cerr << hypo << " " << *hypo << endl;
}
cerr << endl;
*/
}
void MiniStack::Clear()
{
m_sortedHypos = NULL;
m_coll.clear();
}
///////////////////////////////////////////////////////////////
Stack::Stack(const Manager &mgr)
:m_mgr(mgr)
,m_coll()
,m_miniStackRecycler()
{
}
Stack::~Stack() {
// TODO Auto-generated destructor stub
}
void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
{
HypoCoverageInternal key = &hypo->GetBitmap();
StackAdd added = GetMiniStack(key).Add(hypo);
if (added.toBeDeleted) {
hypoRecycle.Recycle(added.toBeDeleted);
}
}
std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
{
std::vector<const Hypothesis*> ret;
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
const MiniStack::_HCType &hypos = val.second->GetColl();
ret.insert(ret.end(), hypos.begin(), hypos.end());
}
std::vector<const Hypothesis*>::iterator iterMiddle;
iterMiddle = (num == 0 || ret.size() < num)
? ret.end()
: ret.begin()+num;
std::partial_sort(ret.begin(), iterMiddle, ret.end(),
HypothesisFutureScoreOrderer());
return ret;
}
size_t Stack::GetHypoSize() const
{
size_t ret = 0;
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
const MiniStack::_HCType &hypos = val.second->GetColl();
ret += hypos.size();
}
return ret;
}
MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key)
{
MiniStack *ret;
Coll::iterator iter = m_coll.find(key);
if (iter == m_coll.end()) {
if (m_miniStackRecycler.empty()) {
ret = new (m_mgr.GetPool().Allocate<MiniStack>()) MiniStack(m_mgr);
}
else {
ret = m_miniStackRecycler.back();
ret->Clear();
m_miniStackRecycler.pop_back();
}
m_coll[key] = ret;
}
else {
ret = iter->second;
}
return *ret;
}
void Stack::Clear()
{
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
MiniStack *miniStack = val.second;
m_miniStackRecycler.push_back(miniStack);
}
m_coll.clear();
}
Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
{
SortedHypos ret;
MemPool &pool = mgr.GetPool();
// prune and sort
Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
size_t i = 0;
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
const MiniStack *miniStack = val.second;
const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl();
BOOST_FOREACH(const Hypothesis *hypo, hypos) {
(*allHypos)[i++] = hypo;
}
}
SortAndPruneHypos(mgr, *allHypos);
// divide hypos by [bitmap, last end pos]
BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
Hypotheses *hypos;
SortedHypos::iterator iter;
iter = ret.find(key);
if (iter == ret.end()) {
hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
ret[key] = hypos;
}
else {
hypos = iter->second;
}
hypos->push_back(hypo);
}
return ret;
}
void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
{
size_t stackSize = mgr.system.stackSize;
Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
/*
cerr << "UNSORTED hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
const Hypothesis *hypo = hypos[i];
cerr << *hypo << endl;
}
cerr << endl;
*/
Hypotheses::iterator iterMiddle;
iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
? hypos.end()
: hypos.begin() + stackSize;
std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
HypothesisFutureScoreOrderer());
// prune
if (stackSize && hypos.size() > stackSize) {
for (size_t i = stackSize; i < hypos.size(); ++i) {
Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
recycler.Recycle(hypo);
}
hypos.resize(stackSize);
}
/*
cerr << "sorted hypos:" << endl;
for (size_t i = 0; i < hypos.size(); ++i) {
const Hypothesis *hypo = hypos[i];
cerr << hypo << " " << *hypo << endl;
}
cerr << endl;
*/
}
void Stack::DebugCounts()
{
/*
cerr << "counts=";
BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
const NSCubePruning::MiniStack &miniStack = *val.second;
size_t count = miniStack.GetColl().size();
cerr << count << " ";
}
cerr << endl;
*/
}
}
}

View File

@ -1,109 +0,0 @@
/*
* Stack.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#pragma once
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <deque>
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "../../MemPool.h"
#include "../../Recycler.h"
#include "../../legacy/Util2.h"
namespace Moses2
{
class Manager;
namespace NSCubePruningBitmapStack
{
typedef Vector<const Hypothesis*> Hypotheses;
class MiniStack
{
public:
typedef boost::unordered_set<const Hypothesis*,
UnorderedComparer<Hypothesis>,
UnorderedComparer<Hypothesis>
> _HCType;
MiniStack(const Manager &mgr);
StackAdd Add(const Hypothesis *hypo);
_HCType &GetColl()
{ return m_coll; }
const _HCType &GetColl() const
{ return m_coll; }
void Clear();
Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const;
protected:
_HCType m_coll;
mutable Hypotheses *m_sortedHypos;
void SortAndPruneHypos(const Manager &mgr) const;
};
/////////////////////////////////////////////
class Stack {
protected:
public:
typedef std::pair<const Bitmap*, size_t> HypoCoverage;
// bitmap and current endPos of hypos
typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
typedef const Bitmap* HypoCoverageInternal;
typedef boost::unordered_map<HypoCoverageInternal, MiniStack*
,boost::hash<HypoCoverageInternal>
,std::equal_to<HypoCoverageInternal>
> Coll;
Stack(const Manager &mgr);
virtual ~Stack();
size_t GetHypoSize() const;
Coll &GetColl()
{ return m_coll; }
const Coll &GetColl() const
{ return m_coll; }
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
MiniStack &GetMiniStack(const HypoCoverageInternal &key);
std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
void Clear();
SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
void DebugCounts();
protected:
const Manager &m_mgr;
Coll m_coll;
std::deque<MiniStack*> m_miniStackRecycler;
};
}
}

View File

@ -1,161 +0,0 @@
/*
* CubePruning.cpp
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#include "Misc.h"
#include "Stack.h"
#include "../Manager.h"
#include "../../MemPool.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningCardinalStack
{
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
}
else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
}
else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
}
return ret;
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
:edge(&edge)
,hypoIndex(hypoIndex)
,tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
this->tpIndex = tpIndex;
CreateHypothesis(mgr);
}
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo = edge->hypos[hypoIndex];
const TargetPhrase &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
//cerr << prevHypo << endl;
//cerr << *prevHypo << endl;
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
hypo->EvaluateWhenApplied();
}
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(
Manager &mgr,
const Hypotheses &hypos,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap)
:hypos(hypos)
,path(path)
,tps(tps)
,newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
{
out << obj.newBitmap;
return out;
}
bool
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
SeenPositionItem val(this, (x<<16) + y);
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
return pairRet.second;
}
void CubeEdge::CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
assert(hypos.size());
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
void CubeEdge::CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
}
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
if (item) {
// recycle unused queue item
queueItemRecycler.push_back(item);
}
}
}
}

View File

@ -1,112 +0,0 @@
/*
* CubePruning.h
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <queue>
#include "../../legacy/Range.h"
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "Stack.h"
namespace Moses2
{
class Manager;
class InputPath;
class TargetPhrases;
class Bitmap;
namespace NSCubePruningCardinalStack
{
class CubeEdge;
///////////////////////////////////////////
class QueueItem
{
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
CubeEdge *edge;
size_t hypoIndex, tpIndex;
Hypothesis *hypo;
protected:
void CreateHypothesis(Manager &mgr);
};
///////////////////////////////////////////
class QueueItemOrderer
{
public:
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
};
///////////////////////////////////////////
class CubeEdge
{
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
public:
typedef std::priority_queue<QueueItem*,
std::vector<QueueItem*>,
QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem,
boost::hash<SeenPositionItem>,
std::equal_to<SeenPositionItem>
> SeenPositions;
const Hypotheses &hypos;
const InputPath &path;
const TargetPhrases &tps;
const Bitmap &newBitmap;
SCORE estimatedScore;
CubeEdge(Manager &mgr,
const Hypotheses &hypos,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
void CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
protected:
};
}
}

View File

@ -1,206 +0,0 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "Stack.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../../InputPaths.h"
#include "../../InputPath.h"
#include "../../System.h"
#include "../../Sentence.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningCardinalStack
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
,m_stack(mgr)
,m_queue(QueueItemOrderer(), std::vector<QueueItem* >() )
,m_seenPositions()
{
}
Search::~Search()
{
}
void Search::Decode()
{
// init cue edges
m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
}
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stack.Add(initHypo, mgr.GetHypoRecycle());
PostDecode(0);
for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
m_stack.Clear();
Decode(stackInd);
PostDecode(stackInd);
//m_stack.DebugCounts();
//cerr << m_stacks << endl;
}
}
void Search::Decode(size_t stackInd)
{
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*> &container = Container(m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container) {
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[stackInd];
BOOST_FOREACH(CubeEdge *edge, edges) {
//cerr << *edge << " ";
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
/*
cerr << "edges: ";
boost::unordered_set<const Bitmap*> uniqueBM;
BOOST_FOREACH(CubeEdge *edge, edges) {
uniqueBM.insert(&edge->newBitmap);
//cerr << *edge << " ";
}
cerr << edges.size() << " " << uniqueBM.size();
cerr << endl;
*/
size_t pops = 0;
while (!m_queue.empty() && pops < mgr.system.popLimit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stack.Add(hypo, hypoRecycler);
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
/*
// create hypo from every edge. Increase diversity
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
CubeEdge &edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, mgr.GetHypoRecycle());
}
}
*/
}
void Search::PostDecode(size_t stackInd)
{
MemPool &pool = mgr.GetPool();
Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
const Bitmap &hypoBitmap = *val.first.first;
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
// create edges to next hypos from existing hypos
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPath *path, paths) {
const Range &pathRange = path->range;
//cerr << "pathRange=" << pathRange << endl;
if (!path->IsUsed()) {
continue;
}
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
size_t numWords = newBitmap.GetNumWordsCovered();
CubeEdges &edges = *m_cubeEdges[numWords];
// sort hypo for a particular bitmap and hypoEndPos
Hypotheses &sortedHypos = *val.second;
size_t numPt = mgr.system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = path->targetPhrases[i];
if (tps && tps->GetSize()) {
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
edges.push_back(edge);
}
}
}
}
}
const Hypothesis *Search::GetBestHypo() const
{
std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = sortedHypos[0];
}
return best;
}
}
}

View File

@ -1,57 +0,0 @@
/*
* Search.h
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include "../Search.h"
#include "Misc.h"
#include "Stack.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class Bitmap;
class Hypothesis;
class InputPath;
class TargetPhrases;
namespace NSCubePruningCardinalStack
{
class Search : public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypo() const;
protected:
Stack m_stack;
CubeEdge::Queue m_queue;
CubeEdge::SeenPositions m_seenPositions;
// CUBE PRUNING VARIABLES
// setup
typedef std::vector<CubeEdge*> CubeEdges;
std::vector<CubeEdges*> m_cubeEdges;
std::deque<QueueItem*> m_queueItemRecycler;
// CUBE PRUNING
// decoding
void Decode(size_t stackInd);
void PostDecode(size_t stackInd);
};
}
}

View File

@ -1,68 +0,0 @@
/*
* Stack.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#pragma once
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <deque>
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "../../MemPool.h"
#include "../../Recycler.h"
#include "../../legacy/Util2.h"
namespace Moses2
{
class Manager;
namespace NSCubePruningCardinalStack
{
typedef Vector<const Hypothesis*> Hypotheses;
/////////////////////////////////////////////
class Stack {
protected:
typedef boost::unordered_set<const Hypothesis*,
UnorderedComparer<Hypothesis>,
UnorderedComparer<Hypothesis>
> _HCType;
public:
typedef std::pair<const Bitmap*, size_t> HypoCoverage;
typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
Stack(const Manager &mgr);
virtual ~Stack();
size_t GetHypoSize() const;
_HCType &GetColl()
{ return m_coll; }
const _HCType &GetColl() const
{ return m_coll; }
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
void Clear();
SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
protected:
const Manager &m_mgr;
_HCType m_coll;
};
}
}

View File

@ -1,161 +0,0 @@
/*
* CubePruning.cpp
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#include "Misc.h"
#include "../Manager.h"
#include "../../MemPool.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
}
else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
}
else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
}
return ret;
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
:edge(&edge)
,hypoIndex(hypoIndex)
,tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
this->tpIndex = tpIndex;
CreateHypothesis(mgr);
}
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
const TargetPhrase &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
//cerr << prevHypo << endl;
//cerr << *prevHypo << endl;
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
hypo->EvaluateWhenApplied();
}
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(
Manager &mgr,
const NSCubePruningMiniStack::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap)
:miniStack(miniStack)
,path(path)
,tps(tps)
,newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
{
out << obj.newBitmap;
return out;
}
bool
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
SeenPositionItem val(this, (x<<16) + y);
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
return pairRet.second;
}
void CubeEdge::CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
}
void CubeEdge::CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
}
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
if (item) {
// recycle unused queue item
queueItemRecycler.push_back(item);
}
}
}
}

View File

@ -1,113 +0,0 @@
/*
* CubePruning.h
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <queue>
#include "../../legacy/Range.h"
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "../CubePruningMiniStack/Stack.h"
namespace Moses2
{
class Manager;
class InputPath;
class TargetPhrases;
class Bitmap;
namespace NSCubePruningPerBitmap
{
class CubeEdge;
///////////////////////////////////////////
class QueueItem
{
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
CubeEdge *edge;
size_t hypoIndex, tpIndex;
Hypothesis *hypo;
protected:
void CreateHypothesis(Manager &mgr);
};
///////////////////////////////////////////
class QueueItemOrderer
{
public:
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
};
///////////////////////////////////////////
class CubeEdge
{
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
public:
typedef std::priority_queue<QueueItem*,
std::vector<QueueItem*>,
QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem,
boost::hash<SeenPositionItem>,
std::equal_to<SeenPositionItem>
> SeenPositions;
const NSCubePruningMiniStack::MiniStack &miniStack;
const InputPath &path;
const TargetPhrases &tps;
const Bitmap &newBitmap;
SCORE estimatedScore;
CubeEdge(Manager &mgr,
const NSCubePruningMiniStack::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
void CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
protected:
};
}
}

View File

@ -1,273 +0,0 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../../InputPaths.h"
#include "../../InputPath.h"
#include "../../System.h"
#include "../../Sentence.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
,m_stacks(mgr)
,m_queue(QueueItemOrderer(),
std::vector<QueueItem*>() )
,m_seenPositions()
{
}
Search::~Search()
{
}
void Search::Decode()
{
// init stacks
m_stacks.Init(mgr.GetInput().GetSize() + 1);
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stacks.Add(initHypo, mgr.GetHypoRecycle());
for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
CreateSearchGraph(stackInd);
}
for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
Decode(stackInd);
//cerr << m_stacks << endl;
}
//DebugCounts();
}
void Search::Decode(size_t stackInd)
{
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
// FOR EACH BITMAP IN EACH STACK
boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM;
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
const Bitmap *bitmap = val.first.first;
uniqueBM[bitmap].push_back(&miniStack);
}
// decode each bitmap
boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter;
for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) {
const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second;
Decode(miniStacks);
}
/*
// FOR EACH STACK
vector<NSCubePruningMiniStack::MiniStack*> miniStacks;
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
miniStacks.push_back(&miniStack);
}
Decode(miniStacks);
*/
}
void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks)
{
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*> &container = Container(m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container) {
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) {
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[miniStack];
BOOST_FOREACH(CubeEdge *edge, edges) {
//cerr << "edge=" << *edge << endl;
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
}
size_t pops = 0;
while (!m_queue.empty() && pops < mgr.system.popLimit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, hypoRecycler);
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
/*
// create hypo from every edge. Increase diversity
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
CubeEdge &edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, mgr.GetHypoRecycle());
}
}
*/
}
void Search::CreateSearchGraph(size_t stackInd)
{
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
MemPool &pool = mgr.GetPool();
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
const Bitmap &hypoBitmap = *val.first.first;
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
// create edges to next hypos from existing hypos
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPath *path, paths) {
const Range &pathRange = path->range;
//cerr << "pathRange=" << pathRange << endl;
if (!path->IsUsed()) {
continue;
}
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
// sort hypo for a particular bitmap and hypoEndPos
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
// add cube edge
size_t numPt = mgr.system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = path->targetPhrases[i];
if (tps && tps->GetSize()) {
// create next mini stack
NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
CubeEdges *edges;
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
if (iter == m_cubeEdges.end()) {
edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
m_cubeEdges[&nextMiniStack] = edges;
}
else {
edges = iter->second;
}
edges->push_back(edge);
}
}
}
}
}
const Hypothesis *Search::GetBestHypo() const
{
const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = sortedHypos[0];
}
return best;
}
void Search::DebugCounts()
{
std::map<size_t, size_t> counts;
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
size_t count = miniStack.GetColl().size();
if (counts.find(count) == counts.end()) {
counts[count] = 0;
}
else {
++counts[count];
}
}
//cerr << m_stacks << endl;
}
std::map<size_t, size_t>::const_iterator iter;
for (iter = counts.begin(); iter != counts.end(); ++iter) {
cerr << iter->first << "=" << iter->second << " ";
}
cerr << endl;
}
}
}

View File

@ -1,66 +0,0 @@
/*
* Search.h
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include "../Search.h"
#include "Misc.h"
#include "Stacks.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class Bitmap;
class Hypothesis;
class InputPath;
class TargetPhrases;
namespace NSCubePruningMiniStack
{
class MiniStack;
}
namespace NSCubePruningPerBitmap
{
class Search : public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypo() const;
protected:
Stacks m_stacks;
CubeEdge::Queue m_queue;
CubeEdge::SeenPositions m_seenPositions;
// CUBE PRUNING VARIABLES
// setup
typedef std::vector<CubeEdge*> CubeEdges;
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
std::deque<QueueItem*> m_queueItemRecycler;
// CUBE PRUNING
// decoding
void CreateSearchGraph(size_t stackInd);
void Decode(size_t stackInd);
void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks);
void DebugCounts();
};
}
}

View File

@ -1,72 +0,0 @@
/*
* Stacks.cpp
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#include "Stacks.h"
#include "../../System.h"
#include "../Manager.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
Stacks::Stacks(const Manager &mgr)
:m_mgr(mgr)
{
}
Stacks::~Stacks()
{
}
void Stacks::Init(size_t numStacks)
{
m_stacks.resize(numStacks);
for (size_t i = 0; i < m_stacks.size(); ++i) {
m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
}
}
std::ostream& operator<<(std::ostream &out, const Stacks &obj)
{
for (size_t i = 0; i < obj.GetSize(); ++i) {
const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
out << stack.GetHypoSize() << " ";
}
return out;
}
void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
{
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
stack.Add(hypo, hypoRecycle);
}
NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
{
size_t numWordsCovered = newBitmap.GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
stack.GetMiniStack(key);
}
}
}

View File

@ -1,51 +0,0 @@
/*
* Stacks.h
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "../CubePruningMiniStack/Stack.h"
#include "../../Recycler.h"
namespace Moses2
{
class Manager;
namespace NSCubePruningPerBitmap
{
class Stacks {
friend std::ostream& operator<<(std::ostream &, const Stacks &);
public:
Stacks(const Manager &mgr);
virtual ~Stacks();
void Init(size_t numStacks);
size_t GetSize() const
{ return m_stacks.size(); }
const NSCubePruningMiniStack::Stack &Back() const
{ return *m_stacks.back(); }
NSCubePruningMiniStack::Stack &operator[](size_t ind)
{ return *m_stacks[ind]; }
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
protected:
const Manager &m_mgr;
std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
};
}
}

View File

@ -1,161 +0,0 @@
/*
* CubePruning.cpp
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#include "Misc.h"
#include "../Manager.h"
#include "../../MemPool.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerMiniStack
{
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
}
else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
}
else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
}
return ret;
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
:edge(&edge)
,hypoIndex(hypoIndex)
,tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
this->tpIndex = tpIndex;
CreateHypothesis(mgr);
}
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
const TargetPhrase &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
//cerr << prevHypo << endl;
//cerr << *prevHypo << endl;
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
hypo->EvaluateWhenApplied();
}
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(
Manager &mgr,
const NSCubePruningMiniStack::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap)
:miniStack(miniStack)
,path(path)
,tps(tps)
,newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
{
out << obj.newBitmap;
return out;
}
bool
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
SeenPositionItem val(this, (x<<16) + y);
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
return pairRet.second;
}
void CubeEdge::CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
}
void CubeEdge::CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
}
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
if (item) {
// recycle unused queue item
queueItemRecycler.push_back(item);
}
}
}
}

View File

@ -1,113 +0,0 @@
/*
* CubePruning.h
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <queue>
#include "../../legacy/Range.h"
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "../CubePruningMiniStack/Stack.h"
namespace Moses2
{
class Manager;
class InputPath;
class TargetPhrases;
class Bitmap;
namespace NSCubePruningPerMiniStack
{
class CubeEdge;
///////////////////////////////////////////
class QueueItem
{
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
CubeEdge *edge;
size_t hypoIndex, tpIndex;
Hypothesis *hypo;
protected:
void CreateHypothesis(Manager &mgr);
};
///////////////////////////////////////////
class QueueItemOrderer
{
public:
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
};
///////////////////////////////////////////
class CubeEdge
{
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
public:
typedef std::priority_queue<QueueItem*,
std::vector<QueueItem*>,
QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem,
boost::hash<SeenPositionItem>,
std::equal_to<SeenPositionItem>
> SeenPositions;
const NSCubePruningMiniStack::MiniStack &miniStack;
const InputPath &path;
const TargetPhrases &tps;
const Bitmap &newBitmap;
SCORE estimatedScore;
CubeEdge(Manager &mgr,
const NSCubePruningMiniStack::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
void CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
protected:
};
}
}

View File

@ -1,248 +0,0 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../../InputPaths.h"
#include "../../InputPath.h"
#include "../../System.h"
#include "../../Sentence.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerMiniStack
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
,m_stacks(mgr)
,m_queue(QueueItemOrderer(),
std::vector<QueueItem*>() )
,m_seenPositions()
{
}
Search::~Search()
{
}
void Search::Decode()
{
// init stacks
m_stacks.Init(mgr.GetInput().GetSize() + 1);
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
initHypo->EmptyHypothesisState(mgr.GetInput());
m_stacks.Add(initHypo, mgr.GetHypoRecycle());
for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
CreateSearchGraph(stackInd);
}
for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
Decode(stackInd);
//cerr << m_stacks << endl;
}
//DebugCounts();
}
void Search::Decode(size_t stackInd)
{
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
Decode(miniStack);
}
}
void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack)
{
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*> &container = Container(m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container) {
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[&miniStack];
BOOST_FOREACH(CubeEdge *edge, edges) {
//cerr << "edge=" << *edge << endl;
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
size_t pops = 0;
while (!m_queue.empty() && pops < mgr.system.popLimit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, hypoRecycler);
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
/*
// create hypo from every edge. Increase diversity
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
CubeEdge &edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, mgr.GetHypoRecycle());
}
}
*/
}
void Search::CreateSearchGraph(size_t stackInd)
{
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
MemPool &pool = mgr.GetPool();
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
const Bitmap &hypoBitmap = *val.first.first;
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
// create edges to next hypos from existing hypos
const InputPaths &paths = mgr.GetInputPaths();
BOOST_FOREACH(const InputPath *path, paths) {
const Range &pathRange = path->range;
//cerr << "pathRange=" << pathRange << endl;
if (!path->IsUsed()) {
continue;
}
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
// sort hypo for a particular bitmap and hypoEndPos
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
// add cube edge
size_t numPt = mgr.system.mappings.size();
for (size_t i = 0; i < numPt; ++i) {
const TargetPhrases *tps = path->targetPhrases[i];
if (tps && tps->GetSize()) {
// create next mini stack
NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
CubeEdges *edges;
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
if (iter == m_cubeEdges.end()) {
edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
m_cubeEdges[&nextMiniStack] = edges;
}
else {
edges = iter->second;
}
edges->push_back(edge);
}
}
}
}
}
const Hypothesis *Search::GetBestHypo() const
{
const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = sortedHypos[0];
}
return best;
}
void Search::DebugCounts()
{
std::map<size_t, size_t> counts;
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
size_t count = miniStack.GetColl().size();
if (counts.find(count) == counts.end()) {
counts[count] = 0;
}
else {
++counts[count];
}
}
//cerr << m_stacks << endl;
}
std::map<size_t, size_t>::const_iterator iter;
for (iter = counts.begin(); iter != counts.end(); ++iter) {
cerr << iter->first << "=" << iter->second << " ";
}
cerr << endl;
}
}
}

View File

@ -1,66 +0,0 @@
/*
* Search.h
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include "../Search.h"
#include "Misc.h"
#include "Stacks.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class Bitmap;
class Hypothesis;
class InputPath;
class TargetPhrases;
namespace NSCubePruningMiniStack
{
class MiniStack;
}
namespace NSCubePruningPerMiniStack
{
class Search : public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypo() const;
protected:
Stacks m_stacks;
CubeEdge::Queue m_queue;
CubeEdge::SeenPositions m_seenPositions;
// CUBE PRUNING VARIABLES
// setup
typedef std::vector<CubeEdge*> CubeEdges;
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
std::deque<QueueItem*> m_queueItemRecycler;
// CUBE PRUNING
// decoding
void CreateSearchGraph(size_t stackInd);
void Decode(size_t stackInd);
void Decode(NSCubePruningMiniStack::MiniStack &miniStack);
void DebugCounts();
};
}
}

View File

@ -1,72 +0,0 @@
/*
* Stacks.cpp
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#include "Stacks.h"
#include "../../System.h"
#include "../Manager.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerMiniStack
{
Stacks::Stacks(const Manager &mgr)
:m_mgr(mgr)
{
}
Stacks::~Stacks()
{
}
void Stacks::Init(size_t numStacks)
{
m_stacks.resize(numStacks);
for (size_t i = 0; i < m_stacks.size(); ++i) {
m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
}
}
std::ostream& operator<<(std::ostream &out, const Stacks &obj)
{
for (size_t i = 0; i < obj.GetSize(); ++i) {
const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
out << stack.GetHypoSize() << " ";
}
return out;
}
void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
{
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
stack.Add(hypo, hypoRecycle);
}
NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
{
size_t numWordsCovered = newBitmap.GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
stack.GetMiniStack(key);
}
}
}

View File

@ -1,51 +0,0 @@
/*
* Stacks.h
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "../CubePruningMiniStack/Stack.h"
#include "../../Recycler.h"
namespace Moses2
{
class Manager;
namespace NSCubePruningPerMiniStack
{
class Stacks {
friend std::ostream& operator<<(std::ostream &, const Stacks &);
public:
Stacks(const Manager &mgr);
virtual ~Stacks();
void Init(size_t numStacks);
size_t GetSize() const
{ return m_stacks.size(); }
const NSCubePruningMiniStack::Stack &Back() const
{ return *m_stacks.back(); }
NSCubePruningMiniStack::Stack &operator[](size_t ind)
{ return *m_stacks[ind]; }
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
protected:
const Manager &m_mgr;
std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
};
}
}

View File

@ -1,244 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <limits>
#include <vector>
#include <iostream>
#include <cstring>
#include <cmath>
#include <cstdlib>
#include "Range.h"
#include "../Array.h"
namespace Moses2
{
class MemPool;
typedef unsigned long WordsBitmapID;
/** Vector of boolean to represent whether a word has been translated or not.
*
* Implemented using a vector of char, which is usually the same representation
* for the elements that a C array of bool would use. A vector of bool, or a
* Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
* algorithms like std::find() are not optimized for vector<bool> on gcc or
* clang, and dynamic_bitset lacks all the optimized search operations we want.
* Only benchmarking will tell what works best. Perhaps dynamic_bitset could
* still be a dramatic improvement, if we flip the meaning of the bits around
* so we can use its find_first() and find_next() for the most common searches.
*/
class Bitmap
{
friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
private:
Array<char> m_bitmap; //! Ticks of words in sentence that have been done.
size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
size_t m_numWordsCovered;
Bitmap(); // not implemented
Bitmap& operator=(const Bitmap& other);
/** Update the first gap, when bits are flipped */
void UpdateFirstGap(size_t startPos, size_t endPos, bool value)
{
if (value) {
//may remove gap
if (startPos <= m_firstGap && m_firstGap <= endPos) {
m_firstGap = NOT_FOUND;
for (size_t i = endPos + 1; i < m_bitmap.size(); ++i) {
if (!m_bitmap[i]) {
m_firstGap = i;
break;
}
}
}
}
else {
//setting positions to false, may add new gap
if (startPos < m_firstGap) {
m_firstGap = startPos;
}
}
}
//! set value between 2 positions, inclusive
void
SetValueNonOverlap(Range const& range) {
size_t startPos = range.GetStartPos();
size_t endPos = range.GetEndPos();
for(size_t pos = startPos; pos <= endPos; pos++) {
m_bitmap[pos] = true;
}
m_numWordsCovered += range.GetNumWordsCovered();
UpdateFirstGap(startPos, endPos, true);
}
public:
//! Create Bitmap of length size, and initialise with vector.
explicit Bitmap(MemPool &pool, size_t size);
void Init(const std::vector<bool>& initializer);
void Init(const Bitmap &copy, const Range &range);
//! Count of words translated.
size_t GetNumWordsCovered() const {
return m_numWordsCovered;
}
//! position of 1st word not yet translated, or NOT_FOUND if everything already translated
size_t GetFirstGapPos() const {
return m_firstGap;
}
//! position of last word not yet translated, or NOT_FOUND if everything already translated
size_t GetLastGapPos() const {
for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
if (!m_bitmap[pos]) {
return pos;
}
}
// no starting pos
return NOT_FOUND;
}
//! position of last translated word
size_t GetLastPos() const {
for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
if (m_bitmap[pos]) {
return pos;
}
}
// no starting pos
return NOT_FOUND;
}
//! whether a word has been translated at a particular position
bool GetValue(size_t pos) const {
return bool(m_bitmap[pos]);
}
//! set value at a particular position
void SetValue( size_t pos, bool value ) {
bool origValue = m_bitmap[pos];
if (origValue == value) {
// do nothing
}
else {
m_bitmap[pos] = value;
UpdateFirstGap(pos, pos, value);
if (value) {
++m_numWordsCovered;
}
else {
--m_numWordsCovered;
}
}
}
//! whether every word has been translated
bool IsComplete() const {
return GetSize() == GetNumWordsCovered();
}
//! whether the wordrange overlaps with any translated word in this bitmap
bool Overlap(const Range &compare) const {
for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) {
if (m_bitmap[pos])
return true;
}
return false;
}
//! number of elements
size_t GetSize() const {
return m_bitmap.size();
}
inline size_t GetEdgeToTheLeftOf(size_t l) const {
if (l == 0) return l;
while (l && !m_bitmap[l-1]) {
--l;
}
return l;
}
inline size_t GetEdgeToTheRightOf(size_t r) const {
if (r+1 == m_bitmap.size()) return r;
return (
std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
m_bitmap.begin()
) - 1;
}
//! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
WordsBitmapID GetID() const {
assert(m_bitmap.size() < (1<<16));
size_t start = GetFirstGapPos();
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
size_t end = GetLastPos();
if (end == NOT_FOUND) end = 0;// nothing translated yet
assert(end < start || end-start <= 16);
WordsBitmapID id = 0;
for(size_t pos = end; pos > start; pos--) {
id = id*2 + (int) GetValue(pos);
}
return id + (1<<16) * start;
}
//! converts bitmap into an integer ID, with an additional span covered
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
assert(m_bitmap.size() < (1<<16));
size_t start = GetFirstGapPos();
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
size_t end = GetLastPos();
if (end == NOT_FOUND) end = 0;// nothing translated yet
if (start == startPos) start = endPos+1;
if (end < endPos) end = endPos;
assert(end < start || end-start <= 16);
WordsBitmapID id = 0;
for(size_t pos = end; pos > start; pos--) {
id = id*2;
if (GetValue(pos) || (startPos<=pos && pos<=endPos))
id++;
}
return id + (1<<16) * start;
}
// for unordered_set in stack
size_t hash() const;
bool operator==(const Bitmap& other) const;
bool operator!=(const Bitmap& other) const {
return !(*this == other);
}
};
}

View File

@ -1,123 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "../legacy/Parameter.h"
#include "AllOptions.h"
namespace Moses2
{
AllOptions::
AllOptions()
: mira(false)
, use_legacy_pt(false)
{ }
AllOptions::
AllOptions(Parameter const& param)
{
init(param);
}
bool
AllOptions::
init(Parameter const& param)
{
if (!search.init(param)) return false;
if (!cube.init(param)) return false;
if (!nbest.init(param)) return false;
if (!reordering.init(param)) return false;
if (!context.init(param)) return false;
if (!input.init(param)) return false;
if (!mbr.init(param)) return false;
if (!lmbr.init(param)) return false;
if (!output.init(param)) return false;
if (!unk.init(param)) return false;
if (!server.init(param)) return false;
if (!syntax.init(param)) return false;
param.SetParameter(mira, "mira", false);
return sanity_check();
}
bool
AllOptions::
sanity_check()
{
using namespace std;
if (lmbr.enabled)
{
if (mbr.enabled)
{
cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
return false;
}
mbr.enabled = true;
}
if (search.consensus)
{
if (mbr.enabled)
{
cerr << "Error: Cannot use consensus decoding together with mbr"
<< endl;
return false;
}
mbr.enabled = true;
}
// RecoverPath should only be used with confusion net or word lattice input
if (output.RecoverPath && input.input_type == SentenceInput)
{
TRACE_ERR("--recover-input-path should only be used with "
<<"confusion net or word lattice input!\n");
output.RecoverPath = false;
}
// set m_nbest_options.enabled = true if necessary:
nbest.enabled = (nbest.enabled || mira || search.consensus
|| nbest.nbest_size > 0
|| mbr.enabled || lmbr.enabled
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty()
|| !output.SearchGraphPB.empty()
|| output.lattice_sample_size != 0);
return true;
}
#ifdef HAVE_XMLRPC_C
bool
AllOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
if (!search.update(param)) return false;
if (!cube.update(param)) return false;
if (!nbest.update(param)) return false;
if (!reordering.update(param)) return false;
if (!context.update(param)) return false;
if (!input.update(param)) return false;
if (!mbr.update(param)) return false;
if (!lmbr.update(param)) return false;
if (!output.update(param)) return false;
if (!unk.update(param)) return false;
if (!server.update(param)) return false;
//if (!syntax.update(param)) return false;
return sanity_check();
}
#endif
bool
AllOptions::
NBestDistinct() const
{
return (nbest.only_distinct
|| mbr.enabled || lmbr.enabled
|| output.lattice_sample_size
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty());
}
}

View File

@ -1,51 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include "OptionsBaseClass.h"
#include "SearchOptions.h"
#include "CubePruningOptions.h"
#include "NBestOptions.h"
#include "ReorderingOptions.h"
#include "ContextParameters.h"
#include "InputOptions.h"
#include "MBR_Options.h"
#include "LMBR_Options.h"
#include "ReportingOptions.h"
#include "OOVHandlingOptions.h"
#include "ServerOptions.h"
#include "SyntaxOptions.h"
namespace Moses2
{
struct
AllOptions : public OptionsBaseClass
{
typedef boost::shared_ptr<AllOptions const> ptr;
SearchOptions search;
CubePruningOptions cube;
NBestOptions nbest;
ReorderingOptions reordering;
ContextParameters context;
InputOptions input;
MBR_Options mbr;
LMBR_Options lmbr;
ReportingOptions output;
OOVHandlingOptions unk;
ServerOptions server;
SyntaxOptions syntax;
bool mira;
bool use_legacy_pt;
// StackOptions stack;
// BeamSearchOptions beam;
bool init(Parameter const& param);
bool sanity_check();
AllOptions();
AllOptions(Parameter const& param);
bool update(std::map<std::string,xmlrpc_c::value>const& param);
bool NBestDistinct() const;
};
}

View File

@ -1,15 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "OptionsBaseClass.h"
namespace Moses2
{
struct
BeamSearchOptions : public OptionsBaseClass
{
bool init(Parameter const& param);
BeamSearchOptions(Parameter const& param);
};
}

View File

@ -1,18 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include "OptionsBaseClass.h"
namespace Moses2
{
class Parameter;
struct BookkeepingOptions : public OptionsBaseClass
{
bool need_alignment_info;
bool init(Parameter const& param);
BookkeepingOptions();
};
}

View File

@ -1,80 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "CubePruningOptions.h"
#include "../TypeDef.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
CubePruningOptions::
CubePruningOptions()
: pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT)
, diversity(DEFAULT_CUBE_PRUNING_DIVERSITY)
, lazy_scoring(false)
, deterministic_search(false)
{}
bool
CubePruningOptions::
init(Parameter const& param)
{
param.SetParameter(pop_limit, "cube-pruning-pop-limit",
DEFAULT_CUBE_PRUNING_POP_LIMIT);
param.SetParameter(diversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
//param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
return true;
}
#ifdef HAVE_XMLRPC_C
bool
CubePruningOptions::
update(std::map<std::string,xmlrpc_c::value>const& params)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = params.find("cube-pruning-pop-limit");
if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second);
si = params.find("cube-pruning-diversity");
if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
si = params.find("cube-pruning-lazy-scoring");
if (si != params.end())
{
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "true" or spec == "on" or spec == "1")
lazy_scoring = true;
else if (spec == "false" or spec == "off" or spec == "0")
lazy_scoring = false;
else
{
char const* msg
= "Error parsing specification for cube-pruning-lazy-scoring";
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
}
}
si = params.find("cube-pruning-deterministic-search");
if (si != params.end())
{
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "true" or spec == "on" or spec == "1")
deterministic_search = true;
else if (spec == "false" or spec == "off" or spec == "0")
deterministic_search = false;
else
{
char const* msg
= "Error parsing specification for cube-pruning-deterministic-search";
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
}
}
return true;
}
#endif
}

View File

@ -1,25 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "OptionsBaseClass.h"
namespace Moses2
{
struct
CubePruningOptions : public OptionsBaseClass
{
size_t pop_limit;
size_t diversity;
bool lazy_scoring;
bool deterministic_search;
bool init(Parameter const& param);
CubePruningOptions(Parameter const& param);
CubePruningOptions();
bool
update(std::map<std::string,xmlrpc_c::value>const& params);
};
}

View File

@ -1,102 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "InputOptions.h"
#include <vector>
#include <iostream>
// #include "moses/StaticData.h"
#include "moses/TypeDef.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
InputOptions::
InputOptions()
: continue_partial_translation(false)
, input_type(SentenceInput)
, xml_policy(XmlPassThrough)
, placeholder_factor(NOT_FOUND)
{
xml_brackets.first = "<";
xml_brackets.second = ">";
factor_order.assign(1,0);
factor_delimiter = "|";
}
bool
InputOptions::
init(Parameter const& param)
{
param.SetParameter(input_type, "inputtype", SentenceInput);
#if 0
if (input_type == SentenceInput)
{ VERBOSE(2, "input type is: text input"); }
else if (input_type == ConfusionNetworkInput)
{ VERBOSE(2, "input type is: confusion net"); }
else if (input_type == WordLatticeInput)
{ VERBOSE(2, "input type is: word lattice"); }
else if (input_type == TreeInputType)
{ VERBOSE(2, "input type is: tree"); }
else if (input_type == TabbedSentenceInput)
{ VERBOSE(2, "input type is: tabbed sentence"); }
else if (input_type == ForestInputType)
{ VERBOSE(2, "input type is: forest"); }
#endif
param.SetParameter(continue_partial_translation,
"continue-partial-translation", false);
param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
// specify XML tags opening and closing brackets for XML option
// Do we really want this to be configurable???? UG
const PARAM_VEC *pspec;
pspec = param.GetParam("xml-brackets");
if (pspec && pspec->size())
{
std::vector<std::string> brackets = Tokenize(pspec->at(0));
if(brackets.size()!=2)
{
std::cerr << "invalid xml-brackets value, "
<< "must specify exactly 2 blank-delimited strings "
<< "for XML tags opening and closing brackets"
<< std::endl;
exit(1);
}
xml_brackets.first= brackets[0];
xml_brackets.second=brackets[1];
#if 0
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
<< xml_brackets.first << " and "
<< xml_brackets.second << std::endl);
#endif
}
pspec = param.GetParam("input-factors");
if (pspec) factor_order = Scan<FactorType>(*pspec);
if (factor_order.empty()) factor_order.assign(1,0);
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
param.SetParameter<std::string>(input_file_path,"input-file","");
return true;
}
#ifdef HAVE_XMLRPC_C
bool
InputOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = param.find("xml-input");
if (si != param.end())
xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
#endif
}

View File

@ -1,32 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <string>
#include "OptionsBaseClass.h"
#include "../TypeDef.h"
namespace Moses2
{
struct
InputOptions : public OptionsBaseClass
{
bool continue_partial_translation;
InputTypeEnum input_type;
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
std::vector<FactorType> factor_order; // input factor order
std::string factor_delimiter;
FactorType placeholder_factor; // where to store original text for placeholders
std::string input_file_path;
std::pair<std::string,std::string> xml_brackets;
// strings to use as XML tags' opening and closing brackets.
// Default are "<" and ">"
InputOptions();
bool init(Parameter const& param);
bool update(std::map<std::string,xmlrpc_c::value>const& param);
};
}

View File

@ -1,39 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "LMBR_Options.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
LMBR_Options::
LMBR_Options()
: enabled(false)
, use_lattice_hyp_set(false)
, precision(0.8f)
, ratio(0.6f)
, map_weight(0.8f)
, pruning_factor(30)
{ }
bool
LMBR_Options::
init(Parameter const& param)
{
param.SetParameter(enabled, "lminimum-bayes-risk", false);
param.SetParameter(ratio, "lmbr-r", 0.6f);
param.SetParameter(precision, "lmbr-p", 0.8f);
param.SetParameter(map_weight, "lmbr-map-weight", 0.0f);
param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30));
param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false);
PARAM_VEC const* params = param.GetParam("lmbr-thetas");
if (params) theta = Scan<float>(*params);
return true;
}
}

View File

@ -1,26 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <vector>
#include "OptionsBaseClass.h"
namespace Moses2
{
// Options for mimum bayes risk decoding
struct
LMBR_Options : public OptionsBaseClass
{
bool enabled;
bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR
float precision; //! unigram precision theta - see Tromble et al 08 for more details
float ratio; //! decaying factor for ngram thetas - see Tromble et al 08
float map_weight; //! Weight given to the map solution. See Kumar et al 09
size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice
std::vector<float> theta; //! theta(s) for lattice mbr calculation
bool init(Parameter const& param);
LMBR_Options();
};
}

View File

@ -1,26 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "MBR_Options.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
MBR_Options::
MBR_Options()
: enabled(false)
, size(200)
, scale(1.0f)
{}
bool
MBR_Options::
init(Parameter const& param)
{
param.SetParameter(enabled, "minimum-bayes-risk", false);
param.SetParameter<size_t>(size, "mbr-size", 200);
param.SetParameter(scale, "mbr-scale", 1.0f);
return true;
}
}

View File

@ -1,21 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "OptionsBaseClass.h"
namespace Moses2
{
// Options for mimum bayes risk decoding
struct
MBR_Options : public OptionsBaseClass
{
bool enabled;
size_t size; //! number of translation candidates considered
float scale; /*! scaling factor for computing marginal probability
* of candidate translation */
bool init(Parameter const& param);
MBR_Options();
};
}

View File

@ -1,50 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "OOVHandlingOptions.h"
#include <vector>
#include <iostream>
#include "moses/StaticData.h"
#include "moses/TypeDef.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
OOVHandlingOptions::
OOVHandlingOptions()
{
drop = false;
mark = false;
prefix = "UNK";
suffix = "";
word_deletion_enabled = false;
always_create_direct_transopt = false;
}
bool
OOVHandlingOptions::
init(Parameter const& param)
{
param.SetParameter(drop,"drop-unknown",false);
param.SetParameter(mark,"mark-unknown",false);
param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
return true;
}
#ifdef HAVE_XMLRPC_C
bool
OOVHandlingOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
// params_t::const_iterator si = param.find("xml-input");
// if (si != param.end())
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
#endif
}

View File

@ -1,27 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <string>
#include "OptionsBaseClass.h"
namespace Moses2
{
struct
OOVHandlingOptions : public OptionsBaseClass
{
bool drop;
bool mark;
std::string prefix;
std::string suffix;
bool word_deletion_enabled;
bool always_create_direct_transopt;
OOVHandlingOptions();
bool init(Parameter const& param);
bool update(std::map<std::string,xmlrpc_c::value>const& param);
};
}

View File

@ -1,30 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#include "OptionsBaseClass.h"
#include "moses/Util.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
#ifdef HAVE_XMLRPC_C
bool
OptionsBaseClass::
update(std::map<std::string,xmlrpc_c::value>const& params)
{
return true;
}
#endif
#ifdef HAVE_XMLRPC_C
bool
OptionsBaseClass::
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key, bool dfltval)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
m = param.find(key);
if (m == param.end()) return dfltval;
return Scan<bool>(xmlrpc_c::value_string(m->second));
}
#endif
}

View File

@ -1,20 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include "moses/xmlrpc-c.h"
#include <string>
#include <map>
namespace Moses2
{
class Parameter;
struct OptionsBaseClass
{
#ifdef HAVE_XMLRPC_C
virtual bool
update(std::map<std::string,xmlrpc_c::value>const& params);
#endif
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key, bool dfltval);
};
}

View File

@ -1,31 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "ReorderingOptions.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
ReorderingOptions::
ReorderingOptions()
: max_distortion(-1)
, monotone_at_punct(false)
, use_early_distortion_cost(false)
{}
ReorderingOptions::
ReorderingOptions(Parameter const& param)
{
init(param);
}
bool
ReorderingOptions::
init(Parameter const& param)
{
param.SetParameter(max_distortion, "distortion-limit", -1);
param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
return true;
}
}

View File

@ -1,20 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "OptionsBaseClass.h"
namespace Moses2
{
struct
ReorderingOptions : public OptionsBaseClass
{
int max_distortion;
bool monotone_at_punct;
bool use_early_distortion_cost;
bool init(Parameter const& param);
ReorderingOptions(Parameter const& param);
ReorderingOptions();
};
}

View File

@ -1,152 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "ReportingOptions.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
using namespace std;
ReportingOptions::
ReportingOptions()
: start_translation_id(0)
, ReportAllFactors(false)
, ReportSegmentation(0)
, PrintAlignmentInfo(false)
, PrintAllDerivations(false)
, PrintTranslationOptions(false)
, WA_SortOrder(NoSort)
, WordGraph(false)
, DontPruneSearchGraph(false)
, RecoverPath(false)
, ReportHypoScore(false)
, PrintID(false)
, PrintPassThrough(false)
, include_lhs_in_search_graph(false)
, lattice_sample_size(0)
{
factor_order.assign(1,0);
factor_delimiter = "|";
}
bool
ReportingOptions::
init(Parameter const& param)
{
param.SetParameter<long>(start_translation_id, "start-translation-id", 0);
// including factors in the output
param.SetParameter(ReportAllFactors, "report-all-factors", false);
// segmentation reporting
ReportSegmentation = (param.GetParam("report-segmentation-enriched")
? 2 : param.GetParam("report-segmentation")
? 1 : 0);
// word alignment reporting
param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
std::string e; // hack to save us param.SetParameter<string>(...)
param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
param.SetParameter(PrintAllDerivations, "print-all-derivations", false);
param.SetParameter(PrintTranslationOptions, "print-translation-option", false);
// output a word graph
PARAM_VEC const* params;
params = param.GetParam("output-word-graph");
WordGraph = (params && params->size() == 2); // what are the two options?
// dump the search graph
param.SetParameter(SearchGraph, "output-search-graph", e);
param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
#ifdef HAVE_PROTOBUF
param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
#endif
param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
param.SetParameter(include_lhs_in_search_graph,
"include-lhs-in-search-graph", false );
// miscellaneous
param.SetParameter(RecoverPath, "recover-input-path",false);
param.SetParameter(ReportHypoScore, "output-hypo-score",false);
param.SetParameter(PrintID, "print-id",false);
param.SetParameter(PrintPassThrough, "print-passthrough",false);
param.SetParameter(detailed_all_transrep_filepath,
"translation-all-details", e);
param.SetParameter(detailed_transrep_filepath, "translation-details", e);
param.SetParameter(detailed_tree_transrep_filepath,
"tree-translation-details", e);
params = param.GetParam("lattice-samples");
if (params) {
if (params->size() ==2 ) {
lattice_sample_filepath = params->at(0);
lattice_sample_size = Scan<size_t>(params->at(1));
} else {
std::cerr <<"wrong format for switch -lattice-samples file size";
return false;
}
}
if (ReportAllFactors) {
factor_order.clear();
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
} else {
params= param.GetParam("output-factors");
if (params) factor_order = Scan<FactorType>(*params);
if (factor_order.empty()) factor_order.assign(1,0);
}
param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
return true;
}
#ifdef HAVE_XMLRPC_C
bool
ReportingOptions::
update(std::map<std::string, xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
std::map<std::string, xmlrpc_c::value>::const_iterator m;
m = param.find("output-factors");
if (m != param.end()) {
factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),",");
}
if (ReportAllFactors) {
factor_order.clear();
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
m = param.find("align");
if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
ReportSegmentation = 1;
PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
m = param.find("factor-delimiter");
if (m != param.end()) {
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
}
m = param.find("output-factor-delimiter");
if (m != param.end()) {
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
}
return true;
}
#endif
}

View File

@ -1,70 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <vector>
#include "OptionsBaseClass.h"
#include "../TypeDef.h"
namespace Moses2
{
struct
ReportingOptions : public OptionsBaseClass
{
long start_translation_id;
std::vector<FactorType> factor_order;
std::string factor_delimiter;
bool ReportAllFactors; // m_reportAllFactors;
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
bool PrintAllDerivations;
bool PrintTranslationOptions;
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
std::string AlignmentOutputFile;
bool WordGraph;
std::string SearchGraph;
std::string SearchGraphExtended;
std::string SearchGraphSLF;
std::string SearchGraphHG;
std::string SearchGraphPB;
bool DontPruneSearchGraph;
bool RecoverPath; // recover input path?
bool ReportHypoScore;
bool PrintID;
bool PrintPassThrough;
// transrep = translation reporting
std::string detailed_transrep_filepath;
std::string detailed_tree_transrep_filepath;
std::string detailed_all_transrep_filepath;
bool include_lhs_in_search_graph;
std::string lattice_sample_filepath;
size_t lattice_sample_size;
bool init(Parameter const& param);
/// do we need to keep the search graph from decoding?
bool NeedSearchGraph() const {
return !(SearchGraph.empty() && SearchGraphExtended.empty());
}
#ifdef HAVE_XMLRPC_C
bool update(std::map<std::string, xmlrpc_c::value>const& param);
#endif
ReportingOptions();
};
}

View File

@ -1,107 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "SearchOptions.h"
#include "../legacy/Parameter.h"
namespace Moses2
{
SearchOptions::
SearchOptions()
: algo(Normal)
, stack_size(DEFAULT_MAX_HYPOSTACK_SIZE)
, stack_diversity(0)
, disable_discarding(false)
, max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH)
, max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE)
, max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE)
, beam_width(DEFAULT_BEAM_WIDTH)
, timeout(0)
, consensus(false)
, early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD)
, trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD)
{ }
SearchOptions::
SearchOptions(Parameter const& param)
: stack_diversity(0)
{
init(param);
}
bool
SearchOptions::
init(Parameter const& param)
{
param.SetParameter(algo, "search-algorithm", Normal);
param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
DEFAULT_EARLY_DISCARDING_THRESHOLD);
param.SetParameter(timeout, "time-out", 0);
param.SetParameter(max_phrase_length, "max-phrase-length",
DEFAULT_MAX_PHRASE_LENGTH);
param.SetParameter(trans_opt_threshold, "translation-option-threshold",
DEFAULT_TRANSLATION_OPTION_THRESHOLD);
param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
DEFAULT_MAX_TRANS_OPT_SIZE);
param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
DEFAULT_MAX_PART_TRANS_OPT_SIZE);
param.SetParameter(consensus, "consensus-decoding", false);
param.SetParameter(disable_discarding, "disable-discarding", false);
// transformation to log of a few scores
beam_width = TransformScore(beam_width);
trans_opt_threshold = TransformScore(trans_opt_threshold);
early_discarding_threshold = TransformScore(early_discarding_threshold);
return true;
}
bool
is_syntax(SearchAlgorithm algo)
{
return (algo == CYKPlus || algo == ChartIncremental ||
algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
}
#ifdef HAVE_XMLRPC_C
bool
SearchOptions::
update(std::map<std::string,xmlrpc_c::value>const& params)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = params.find("search-algorithm");
if (si != params.end())
{
// use named parameters
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "normal" || spec == "0") algo = Normal;
else if (spec == "cube" || spec == "1") algo = CubePruning;
else throw xmlrpc_c::fault("Unsupported search algorithm",
xmlrpc_c::fault::CODE_PARSE);
}
si = params.find("stack");
if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second);
si = params.find("stack-diversity");
if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second);
si = params.find("beam-threshold");
if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second);
si = params.find("time-out");
if (si != params.end()) timeout = xmlrpc_c::value_int(si->second);
si = params.find("max-phrase-length");
if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second);
return true;
}
#endif
}

View File

@ -1,54 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <limits>
#include "OptionsBaseClass.h"
#include "../TypeDef.h"
namespace Moses2
{
bool is_syntax(SearchAlgorithm algo);
struct
SearchOptions : public OptionsBaseClass
{
SearchAlgorithm algo;
// stack decoding
size_t stack_size; // maxHypoStackSize;
size_t stack_diversity; // minHypoStackDiversity;
bool disable_discarding;
// Disable discarding of bad hypotheses from HypothesisStackNormal
size_t max_phrase_length;
size_t max_trans_opt_per_cov;
size_t max_partial_trans_opt;
// beam search
float beam_width;
int timeout;
bool consensus; //! Use Consensus decoding (DeNero et al 2009)
// reordering options
// bool reorderingConstraint; //! use additional reordering constraints
// bool useEarlyDistortionCost;
float early_discarding_threshold;
float trans_opt_threshold;
bool init(Parameter const& param);
SearchOptions(Parameter const& param);
SearchOptions();
bool
UseEarlyDiscarding() const {
return early_discarding_threshold != -std::numeric_limits<float>::infinity();
}
bool
update(std::map<std::string,xmlrpc_c::value>const& params);
};
}

View File

@ -1,43 +0,0 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include <map>
#include <stdint.h>
#include <xmlrpc-c/base.hpp>
#include <xmlrpc-c/registry.hpp>
#include <xmlrpc-c/server_abyss.hpp>
namespace Moses2
{
class Parameter;
struct
ServerOptions
{
bool is_serial;
uint32_t numThreads; // might not be used any more, actually
size_t sessionTimeout; // this is related to Moses translation sessions
size_t sessionCacheSize; // this is related to Moses translation sessions
int port; // this is for the abyss server
std::string logfile; // this is for the abyss server
int maxConn; // this is for the abyss server
int maxConnBacklog; // this is for the abyss server
int keepaliveTimeout; // this is for the abyss server
int keepaliveMaxConn; // this is for the abyss server
int timeout; // this is for the abyss server
bool init(Parameter const& param);
ServerOptions(Parameter const& param);
ServerOptions();
bool
update(std::map<std::string,xmlrpc_c::value>const& params)
{
return true;
}
};
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,36 +5,37 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.602770742." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1436139469" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.622899770" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1448999623" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2139008298" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2008193341" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.285185442" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.587301391" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<option id="gnu.c.compiler.option.dialect.std.1726327101" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1464765114" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.606542044" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
@ -65,12 +66,14 @@
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="probingpt"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/probingpt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
@ -78,6 +81,7 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2077999464" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@ -97,29 +101,29 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.168814843." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.844577457" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1635721038" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.361379130" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.799410017" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1404799808" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2139553528" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1633770352" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1045097629" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.455462639" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>

Some files were not shown because too many files have changed in this diff Show More