mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 23:00:29 +03:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
f07e60aece
24
.travis.yml
Normal file
24
.travis.yml
Normal file
@ -0,0 +1,24 @@
|
||||
sudo: false
|
||||
dist: trusty
|
||||
language: c
|
||||
compiler: gcc
|
||||
env:
|
||||
matrix:
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- subversion
|
||||
- automake
|
||||
- libtool
|
||||
- zlib1g-dev
|
||||
- libbz2-dev
|
||||
- liblzma-dev
|
||||
- libboost-all-dev
|
||||
- libgoogle-perftools-dev
|
||||
- libxmlrpc-c++.*-dev
|
||||
- cmake
|
||||
- csh
|
||||
script:
|
||||
- ./bjam -j4
|
10
Jamroot
10
Jamroot
@ -111,7 +111,7 @@ external-lib z ;
|
||||
|
||||
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
|
||||
#requirements += <library>dl ;
|
||||
#requirements += <cxxflags>-std=c++0x ;
|
||||
requirements += <cxxflags>-std=c++0x ;
|
||||
|
||||
# Allow moses to report the git commit hash of the version used for compilation
|
||||
moses_githash = [ _shell "git describe --dirty" ] ;
|
||||
@ -183,7 +183,6 @@ requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
|
||||
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
|
||||
|
||||
if [ option.get "with-oxlm" ] {
|
||||
external-lib boost_serialization ;
|
||||
external-lib gomp ;
|
||||
requirements += <library>boost_serialization ;
|
||||
requirements += <library>gomp ;
|
||||
@ -317,7 +316,8 @@ contrib/c++tokenizer//tokenizer
|
||||
contrib/expected-bleu-training//train-expected-bleu
|
||||
contrib/expected-bleu-training//prepare-expected-bleu-training
|
||||
|
||||
contrib/moses2//programs
|
||||
probingpt//programs
|
||||
moses2//programs
|
||||
;
|
||||
|
||||
|
||||
@ -340,6 +340,6 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
|
||||
#local temp = [ _shell "bash source ./s.sh" ] ;
|
||||
local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
|
||||
local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
|
||||
local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
|
||||
local temp = [ _shell "cd $(TOP)/bin && ln -s CreateProbingPT CreateProbingPT2" ] ;
|
||||
local temp = [ _shell "cd $(TOP)/bin && ln -sf moses moses_chart" ] ;
|
||||
local temp = [ _shell "cd $(TOP)/bin && ln -sf CreateProbingPT CreateProbingPT2" ] ;
|
||||
|
||||
|
@ -91,11 +91,11 @@ $(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh):
|
||||
rm -rf ${TMP}
|
||||
|
||||
# boost
|
||||
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz/download
|
||||
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.63.0/boost_1_63_0.tar.gz/download
|
||||
boost: TMP=$(CWD)/build/boost
|
||||
boost: override PREFIX=${BOOST_PREFIX}
|
||||
boost: | $(call safepath,${BOOST_PREFIX}/include/boost)
|
||||
$(call safepath,${BOOST_PREFIX}/include/boost):
|
||||
$(sfget)
|
||||
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
|
||||
cd '${TMP}/boost_1_63_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
|
||||
rm -rf ${TMP}
|
||||
|
@ -1,129 +0,0 @@
|
||||
/*
|
||||
* ArcList.cpp
|
||||
*
|
||||
* Created on: 26 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "ArcLists.h"
|
||||
#include "HypothesisBase.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
ArcLists::ArcLists()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
ArcLists::~ArcLists()
|
||||
{
|
||||
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
|
||||
const ArcList *arcList = collPair.second;
|
||||
delete arcList;
|
||||
}
|
||||
}
|
||||
|
||||
void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
|
||||
const HypothesisBase *otherHypo)
|
||||
{
|
||||
//cerr << added << " " << currHypo << " " << otherHypo << endl;
|
||||
ArcList *arcList;
|
||||
if (added) {
|
||||
// we're winners!
|
||||
if (otherHypo) {
|
||||
// there was a existing losing hypo
|
||||
arcList = &GetAndDetachArcList(otherHypo);
|
||||
}
|
||||
else {
|
||||
// there was no existing hypo
|
||||
arcList = new ArcList;
|
||||
}
|
||||
m_coll[currHypo] = arcList;
|
||||
}
|
||||
else {
|
||||
// we're losers!
|
||||
// there should be a winner, we're not doing beam pruning
|
||||
UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
|
||||
arcList = &GetArcList(otherHypo);
|
||||
}
|
||||
|
||||
// in any case, add the curr hypo
|
||||
arcList->push_back(currHypo);
|
||||
}
|
||||
|
||||
ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
|
||||
{
|
||||
Coll::iterator iter = m_coll.find(hypo);
|
||||
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
||||
ArcList &arcList = *iter->second;
|
||||
return arcList;
|
||||
}
|
||||
|
||||
const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
|
||||
{
|
||||
Coll::const_iterator iter = m_coll.find(hypo);
|
||||
|
||||
if (iter == m_coll.end()) {
|
||||
cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
|
||||
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
|
||||
const HypothesisBase *hypo = collPair.first;
|
||||
cerr << hypo << " ";
|
||||
}
|
||||
}
|
||||
|
||||
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
|
||||
ArcList &arcList = *iter->second;
|
||||
return arcList;
|
||||
}
|
||||
|
||||
ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
|
||||
{
|
||||
Coll::iterator iter = m_coll.find(hypo);
|
||||
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
||||
ArcList &arcList = *iter->second;
|
||||
|
||||
m_coll.erase(iter);
|
||||
|
||||
return arcList;
|
||||
}
|
||||
|
||||
void ArcLists::Sort()
|
||||
{
|
||||
BOOST_FOREACH(Coll::value_type &collPair, m_coll){
|
||||
ArcList &list = *collPair.second;
|
||||
std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
|
||||
}
|
||||
}
|
||||
|
||||
void ArcLists::Delete(const HypothesisBase *hypo)
|
||||
{
|
||||
//cerr << "hypo=" << hypo->Debug() << endl;
|
||||
//cerr << "m_coll=" << m_coll.size() << endl;
|
||||
Coll::iterator iter = m_coll.find(hypo);
|
||||
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
||||
ArcList *arcList = iter->second;
|
||||
|
||||
m_coll.erase(iter);
|
||||
delete arcList;
|
||||
}
|
||||
|
||||
std::string ArcLists::Debug(const System &system) const
|
||||
{
|
||||
stringstream strm;
|
||||
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
|
||||
const ArcList *arcList = collPair.second;
|
||||
strm << arcList << "(" << arcList->size() << ") ";
|
||||
}
|
||||
return strm.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
/*
|
||||
* SkeletonStatefulFF.h
|
||||
*
|
||||
* Created on: 27 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef SKELETONSTATEFULFF_H_
|
||||
#define SKELETONSTATEFULFF_H_
|
||||
|
||||
#include "StatefulFeatureFunction.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class SkeletonStatefulFF: public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
SkeletonStatefulFF(size_t startInd, const std::string &line);
|
||||
virtual ~SkeletonStatefulFF();
|
||||
|
||||
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
||||
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
||||
const InputType &input, const Hypothesis &hypo) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
||||
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
||||
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
||||
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
||||
FFState &state) const;
|
||||
|
||||
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
||||
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
||||
FFState &state) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /* SKELETONSTATEFULFF_H_ */
|
@ -1,34 +0,0 @@
|
||||
/*
|
||||
* SkeletonStatefulFF.h
|
||||
*
|
||||
* Created on: 27 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class SkeletonStatelessFF: public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
SkeletonStatelessFF(size_t startInd, const std::string &line);
|
||||
virtual ~SkeletonStatelessFF();
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
||||
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
||||
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,189 +0,0 @@
|
||||
/*
|
||||
* HypothesisColl.cpp
|
||||
*
|
||||
* Created on: 26 Feb 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "HypothesisColl.h"
|
||||
#include "ManagerBase.h"
|
||||
#include "System.h"
|
||||
#include "MemPoolAllocator.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
HypothesisColl::HypothesisColl(const ManagerBase &mgr) :
|
||||
m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())), m_sortedHypos(
|
||||
NULL)
|
||||
{
|
||||
}
|
||||
|
||||
const HypothesisBase *HypothesisColl::GetBestHypo() const
|
||||
{
|
||||
if (GetSize() == 0) {
|
||||
return NULL;
|
||||
}
|
||||
if (m_sortedHypos) {
|
||||
return (*m_sortedHypos)[0];
|
||||
}
|
||||
|
||||
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
|
||||
const HypothesisBase *bestHypo;
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
||||
if (hypo->GetFutureScore() > bestScore) {
|
||||
bestScore = hypo->GetFutureScore();
|
||||
bestHypo = hypo;
|
||||
}
|
||||
}
|
||||
return bestHypo;
|
||||
}
|
||||
|
||||
void HypothesisColl::Add(
|
||||
const System &system,
|
||||
HypothesisBase *hypo,
|
||||
Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists)
|
||||
{
|
||||
StackAdd added = Add(hypo);
|
||||
|
||||
size_t nbestSize = system.options.nbest.nbest_size;
|
||||
if (nbestSize) {
|
||||
arcLists.AddArc(added.added, hypo, added.other);
|
||||
}
|
||||
else {
|
||||
if (!added.added) {
|
||||
hypoRecycle.Recycle(hypo);
|
||||
}
|
||||
else if (added.other) {
|
||||
hypoRecycle.Recycle(added.other);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
|
||||
{
|
||||
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
|
||||
|
||||
// CHECK RECOMBINATION
|
||||
if (addRet.second) {
|
||||
// equiv hypo doesn't exists
|
||||
return StackAdd(true, NULL);
|
||||
}
|
||||
else {
|
||||
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
|
||||
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
||||
// incoming hypo is better than the one we have
|
||||
const HypothesisBase * const &hypoExisting1 = *addRet.first;
|
||||
const HypothesisBase *&hypoExisting2 =
|
||||
const_cast<const HypothesisBase *&>(hypoExisting1);
|
||||
hypoExisting2 = hypo;
|
||||
|
||||
return StackAdd(true, hypoExisting);
|
||||
}
|
||||
else {
|
||||
// already storing the best hypo. discard incoming hypo
|
||||
return StackAdd(false, hypoExisting);
|
||||
}
|
||||
}
|
||||
|
||||
assert(false);
|
||||
}
|
||||
|
||||
const Hypotheses &HypothesisColl::GetSortedAndPruneHypos(
|
||||
const ManagerBase &mgr,
|
||||
ArcLists &arcLists) const
|
||||
{
|
||||
if (m_sortedHypos == NULL) {
|
||||
// create sortedHypos first
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
|
||||
m_coll.size());
|
||||
|
||||
size_t ind = 0;
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
|
||||
(*m_sortedHypos)[ind] = hypo;
|
||||
++ind;
|
||||
}
|
||||
|
||||
SortAndPruneHypos(mgr, arcLists);
|
||||
}
|
||||
|
||||
return *m_sortedHypos;
|
||||
}
|
||||
|
||||
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const
|
||||
{
|
||||
UTIL_THROW_IF2(m_sortedHypos == NULL, "m_sortedHypos must be sorted beforehand");
|
||||
return *m_sortedHypos;
|
||||
}
|
||||
|
||||
void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr,
|
||||
ArcLists &arcLists) const
|
||||
{
|
||||
size_t stackSize = mgr.system.options.search.stack_size;
|
||||
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
|
||||
|
||||
/*
|
||||
cerr << "UNSORTED hypos: ";
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
||||
cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
Hypotheses::iterator iterMiddle;
|
||||
iterMiddle =
|
||||
(stackSize == 0 || m_sortedHypos->size() < stackSize) ?
|
||||
m_sortedHypos->end() : m_sortedHypos->begin() + stackSize;
|
||||
|
||||
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
|
||||
// prune
|
||||
if (stackSize && m_sortedHypos->size() > stackSize) {
|
||||
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
|
||||
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
|
||||
recycler.Recycle(hypo);
|
||||
|
||||
// delete from arclist
|
||||
if (mgr.system.options.nbest.nbest_size) {
|
||||
arcLists.Delete(hypo);
|
||||
}
|
||||
}
|
||||
m_sortedHypos->resize(stackSize);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "sorted hypos: ";
|
||||
for (size_t i = 0; i < m_sortedHypos->size(); ++i) {
|
||||
const HypothesisBase *hypo = (*m_sortedHypos)[i];
|
||||
cerr << hypo << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
}
|
||||
|
||||
void HypothesisColl::Clear()
|
||||
{
|
||||
m_sortedHypos = NULL;
|
||||
m_coll.clear();
|
||||
}
|
||||
|
||||
std::string HypothesisColl::Debug(const System &system) const
|
||||
{
|
||||
stringstream out;
|
||||
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
|
||||
out << hypo->Debug(system);
|
||||
out << std::endl << std::endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
} /* namespace Moses2 */
|
@ -1,246 +0,0 @@
|
||||
/*
|
||||
* LanguageModelDALM.cpp
|
||||
*
|
||||
* Created on: 5 Dec 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "LanguageModelDALM.h"
|
||||
#include "../TypeDef.h"
|
||||
#include "../System.h"
|
||||
#include "dalm.h"
|
||||
#include "util/exception.hh"
|
||||
#include "../legacy/InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
class Murmur: public DALM::State::HashFunction
|
||||
{
|
||||
public:
|
||||
Murmur(std::size_t seed=0): seed(seed) {
|
||||
}
|
||||
virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const {
|
||||
return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed);
|
||||
}
|
||||
private:
|
||||
std::size_t seed;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
class DALMState : public FFState
|
||||
{
|
||||
private:
|
||||
DALM::State state;
|
||||
|
||||
public:
|
||||
DALMState() {
|
||||
}
|
||||
|
||||
DALMState(const DALMState &from) {
|
||||
state = from.state;
|
||||
}
|
||||
|
||||
virtual ~DALMState() {
|
||||
}
|
||||
|
||||
void reset(const DALMState &from) {
|
||||
state = from.state;
|
||||
}
|
||||
|
||||
virtual int Compare(const FFState& other) const {
|
||||
const DALMState &o = static_cast<const DALMState &>(other);
|
||||
if(state.get_count() < o.state.get_count()) return -1;
|
||||
else if(state.get_count() > o.state.get_count()) return 1;
|
||||
else return state.compare(o.state);
|
||||
}
|
||||
|
||||
virtual size_t hash() const {
|
||||
// imitate KenLM
|
||||
return state.hash(Murmur());
|
||||
}
|
||||
|
||||
virtual bool operator==(const FFState& other) const {
|
||||
const DALMState &o = static_cast<const DALMState &>(other);
|
||||
return state.compare(o.state) == 0;
|
||||
}
|
||||
|
||||
DALM::State &get_state() {
|
||||
return state;
|
||||
}
|
||||
|
||||
void refresh() {
|
||||
state.refresh();
|
||||
}
|
||||
|
||||
virtual std::string ToString() const
|
||||
{ return "DALM state"; }
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
|
||||
{
|
||||
ifstream ifs(inifile);
|
||||
string line;
|
||||
|
||||
getline(ifs, line);
|
||||
while(ifs) {
|
||||
unsigned int pos = line.find("=");
|
||||
string key = line.substr(0, pos);
|
||||
string value = line.substr(pos+1, line.size()-pos);
|
||||
if(key=="MODEL") {
|
||||
model = value;
|
||||
} else if(key=="WORDS") {
|
||||
words = value;
|
||||
} else if(key=="WORDSTXT") {
|
||||
wordstxt = value;
|
||||
}
|
||||
getline(ifs, line);
|
||||
}
|
||||
}
|
||||
/////////////////////////
|
||||
|
||||
LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line)
|
||||
:StatefulFeatureFunction(startInd, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
LanguageModelDALM::~LanguageModelDALM() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void LanguageModelDALM::Load(System &system)
|
||||
{
|
||||
/////////////////////
|
||||
// READING INIFILE //
|
||||
/////////////////////
|
||||
string inifile= m_filePath + "/dalm.ini";
|
||||
|
||||
string model; // Path to the double-array file.
|
||||
string words; // Path to the vocabulary file.
|
||||
string wordstxt; //Path to the vocabulary file in text format.
|
||||
read_ini(inifile.c_str(), model, words, wordstxt);
|
||||
|
||||
model = m_filePath + "/" + model;
|
||||
words = m_filePath + "/" + words;
|
||||
wordstxt = m_filePath + "/" + wordstxt;
|
||||
|
||||
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
|
||||
util::FileOpenException,
|
||||
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
|
||||
|
||||
////////////////
|
||||
// LOADING LM //
|
||||
////////////////
|
||||
|
||||
// Preparing a logger object.
|
||||
m_logger = new DALM::Logger(stderr);
|
||||
m_logger->setLevel(DALM::LOGGER_INFO);
|
||||
|
||||
// Load the vocabulary file.
|
||||
m_vocab = new DALM::Vocabulary(words, *m_logger);
|
||||
|
||||
// Load the language model.
|
||||
m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger);
|
||||
|
||||
wid_start = m_vocab->lookup(BOS_);
|
||||
wid_end = m_vocab->lookup(EOS_);
|
||||
|
||||
// vocab mapping
|
||||
CreateVocabMapping(wordstxt, system);
|
||||
|
||||
m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system);
|
||||
}
|
||||
|
||||
void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system)
|
||||
{
|
||||
InputFileStream vocabStrm(wordstxt);
|
||||
|
||||
std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
|
||||
string line;
|
||||
std::size_t max_fid = 0;
|
||||
while(getline(vocabStrm, line)) {
|
||||
const Factor *factor = system.GetVocab().AddFactor(line, system);
|
||||
std::size_t fid = factor->GetId();
|
||||
DALM::VocabId wid = m_vocab->lookup(line.c_str());
|
||||
|
||||
vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
|
||||
if(max_fid < fid) max_fid = fid;
|
||||
}
|
||||
|
||||
for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
|
||||
m_vocabMap[i] = m_vocab->unk();
|
||||
}
|
||||
|
||||
m_vocabMap.resize(max_fid+1, m_vocab->unk());
|
||||
std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
|
||||
while(it != vlist.end()) {
|
||||
std::pair<std::size_t, DALM::VocabId> &entry = *it;
|
||||
m_vocabMap[entry.first] = entry.second;
|
||||
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "order") {
|
||||
m_nGramOrder = Scan<size_t>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
m_ContextSize = m_nGramOrder-1;
|
||||
}
|
||||
|
||||
FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const
|
||||
{
|
||||
DALMState *state = new DALMState();
|
||||
return state;
|
||||
}
|
||||
|
||||
void LanguageModelDALM::EmptyHypothesisState(FFState &state,
|
||||
const ManagerBase &mgr,
|
||||
const InputType &input,
|
||||
const Hypothesis &hypo) const
|
||||
{
|
||||
DALMState &dalmState = static_cast<DALMState&>(state);
|
||||
m_lm->init_state(dalmState.get_state());
|
||||
}
|
||||
|
||||
void LanguageModelDALM::EvaluateInIsolation(MemPool &pool,
|
||||
const System &system,
|
||||
const Phrase &source,
|
||||
const TargetPhraseImpl &targetPhrase,
|
||||
Scores &scores,
|
||||
SCORE &estimatedScore) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr,
|
||||
const Hypothesis &hypo,
|
||||
const FFState &prevState,
|
||||
Scores &scores,
|
||||
FFState &state) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr,
|
||||
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
||||
FFState &state) const
|
||||
{
|
||||
UTIL_THROW2("Not implemented");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,75 +0,0 @@
|
||||
/*
|
||||
* LanguageModelDALM.h
|
||||
*
|
||||
* Created on: 5 Dec 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "../FF/StatefulFeatureFunction.h"
|
||||
#include "../legacy/Util2.h"
|
||||
#include "../legacy/Factor.h"
|
||||
|
||||
namespace DALM
|
||||
{
|
||||
class Logger;
|
||||
class Vocabulary;
|
||||
class State;
|
||||
class LM;
|
||||
union Fragment;
|
||||
class Gap;
|
||||
|
||||
typedef unsigned int VocabId;
|
||||
}
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class LanguageModelDALM: public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
LanguageModelDALM(size_t startInd, const std::string &line);
|
||||
virtual ~LanguageModelDALM();
|
||||
|
||||
virtual void Load(System &system);
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
||||
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
||||
const InputType &input, const Hypothesis &hypo) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source,
|
||||
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
||||
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
||||
FFState &state) const;
|
||||
|
||||
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
||||
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
||||
FFState &state) const;
|
||||
|
||||
protected:
|
||||
FactorType m_factorType;
|
||||
|
||||
std::string m_filePath;
|
||||
size_t m_nGramOrder; //! max n-gram length contained in this LM
|
||||
size_t m_ContextSize;
|
||||
|
||||
DALM::Logger *m_logger;
|
||||
DALM::Vocabulary *m_vocab;
|
||||
DALM::LM *m_lm;
|
||||
DALM::VocabId wid_start, wid_end;
|
||||
|
||||
const Factor *m_beginSentenceFactor;
|
||||
|
||||
mutable std::vector<DALM::VocabId> m_vocabMap;
|
||||
|
||||
void CreateVocabMapping(const std::string &wordstxt, const System &system);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,171 +0,0 @@
|
||||
/*
|
||||
* SearchNormal.cpp
|
||||
*
|
||||
* Created on: 25 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Search.h"
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../TrellisPath.h"
|
||||
#include "../Sentence.h"
|
||||
#include "../../TrellisPaths.h"
|
||||
#include "../../InputPathsBase.h"
|
||||
#include "../../Phrase.h"
|
||||
#include "../../System.h"
|
||||
#include "../../PhraseBased/TargetPhrases.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace NSBatch
|
||||
{
|
||||
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
, m_stacks(mgr)
|
||||
, m_batch(mgr.system.GetBatch(mgr.GetSystemPool()))
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init stacks
|
||||
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
|
||||
m_stacks.Init(mgr, sentence.GetSize() + 1);
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
|
||||
initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
Decode(stackInd);
|
||||
//cerr << m_stacks << endl;
|
||||
|
||||
// delete stack to save mem
|
||||
if (stackInd < m_stacks.GetSize() - 1) {
|
||||
m_stacks.Delete(stackInd);
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
Stack &stack = m_stacks[stackInd];
|
||||
if (&stack == &m_stacks.Back()) {
|
||||
// last stack. don't do anythin
|
||||
return;
|
||||
}
|
||||
|
||||
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPathBase *path, paths){
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
|
||||
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
|
||||
}
|
||||
}
|
||||
|
||||
// process batch
|
||||
mgr.system.featureFunctions.EvaluateWhenAppliedBatch(m_batch);
|
||||
|
||||
for (size_t i = 0; i < m_batch.size(); ++i) {
|
||||
Hypothesis *hypo = m_batch[i];
|
||||
m_stacks.Add(hypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
}
|
||||
m_batch.clear();
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const InputPath &path)
|
||||
{
|
||||
const Bitmap &hypoBitmap = hypo.GetBitmap();
|
||||
const Range &hypoRange = hypo.GetInputPath().range;
|
||||
const Range &pathRange = path.range;
|
||||
|
||||
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
|
||||
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
|
||||
return;
|
||||
}
|
||||
//cerr << " YES" << endl;
|
||||
|
||||
// extend this hypo
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
|
||||
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
const TargetPhrases **tpsAllPt = path.targetPhrases;
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = tpsAllPt[i];
|
||||
if (tps) {
|
||||
Extend(hypo, *tps, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
|
||||
Extend(hypo, *tp, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
|
||||
|
||||
m_batch.push_back(newHypo);
|
||||
//newHypo->EvaluateWhenApplied();
|
||||
|
||||
//m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
|
||||
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
return lastStack.GetBestHypo<Hypothesis>();
|
||||
}
|
||||
|
||||
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
|
||||
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
|
||||
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
|
||||
paths.Add(path);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
}
|
||||
|
@ -1,53 +0,0 @@
|
||||
/*
|
||||
* SearchNormal.h
|
||||
*
|
||||
* Created on: 25 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "../../legacy/Range.h"
|
||||
#include "../../legacy/Bitmap.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../Search.h"
|
||||
#include "Stacks.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Hypothesis;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class TargetPhraseImpl;
|
||||
|
||||
namespace NSBatch
|
||||
{
|
||||
class Stacks;
|
||||
|
||||
class Search: public Moses2::Search
|
||||
{
|
||||
public:
|
||||
Search(Manager &mgr);
|
||||
virtual ~Search();
|
||||
|
||||
virtual void Decode();
|
||||
const Hypothesis *GetBestHypo() const;
|
||||
|
||||
void AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const;
|
||||
|
||||
protected:
|
||||
Stacks m_stacks;
|
||||
|
||||
Batch &m_batch;
|
||||
|
||||
void Decode(size_t stackInd);
|
||||
void Extend(const Hypothesis &hypo, const InputPath &path);
|
||||
void Extend(const Hypothesis &hypo, const TargetPhrases &tps,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
|
||||
void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Stack.cpp
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Stack.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../Scores.h"
|
||||
#include "../../HypothesisColl.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSBatch
|
||||
{
|
||||
|
||||
Stack::Stack(const Manager &mgr) :
|
||||
HypothesisColl(mgr)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Stack::~Stack()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Stack.h
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <deque>
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../HypothesisColl.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSBatch
|
||||
{
|
||||
class Stack: public HypothesisColl
|
||||
{
|
||||
public:
|
||||
Stack(const Manager &mgr);
|
||||
virtual ~Stack();
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Stacks.cpp
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Stacks.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSBatch
|
||||
{
|
||||
|
||||
Stacks::Stacks(const Manager &mgr) :
|
||||
m_mgr(mgr)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Stacks::~Stacks()
|
||||
{
|
||||
for (size_t i = 0; i < m_stacks.size(); ++i) {
|
||||
delete m_stacks[i];
|
||||
}
|
||||
}
|
||||
|
||||
void Stacks::Init(const Manager &mgr, size_t numStacks)
|
||||
{
|
||||
m_stacks.resize(numStacks);
|
||||
for (size_t i = 0; i < m_stacks.size(); ++i) {
|
||||
m_stacks[i] = new Stack(mgr);
|
||||
}
|
||||
}
|
||||
|
||||
std::string Stacks::Debug(const System &system) const
|
||||
{
|
||||
stringstream out;
|
||||
for (size_t i = 0; i < GetSize(); ++i) {
|
||||
const Stack *stack = m_stacks[i];
|
||||
if (stack) {
|
||||
out << stack->GetSize() << " ";
|
||||
}
|
||||
else {
|
||||
out << "N ";
|
||||
}
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void Stacks::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists)
|
||||
{
|
||||
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
|
||||
//cerr << "numWordsCovered=" << numWordsCovered << endl;
|
||||
Stack &stack = *m_stacks[numWordsCovered];
|
||||
stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Stacks.h
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "Stack.h"
|
||||
#include "../../Recycler.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Manager;
|
||||
class ArcLists;
|
||||
|
||||
namespace NSBatch
|
||||
{
|
||||
|
||||
class Stacks
|
||||
{
|
||||
public:
|
||||
Stacks(const Manager &mgr);
|
||||
virtual ~Stacks();
|
||||
|
||||
void Init(const Manager &mgr, size_t numStacks);
|
||||
|
||||
size_t GetSize() const
|
||||
{
|
||||
return m_stacks.size();
|
||||
}
|
||||
|
||||
const Stack &Back() const
|
||||
{
|
||||
return *m_stacks.back();
|
||||
}
|
||||
|
||||
Stack &operator[](size_t ind)
|
||||
{
|
||||
return *m_stacks[ind];
|
||||
}
|
||||
|
||||
void Delete(size_t ind)
|
||||
{
|
||||
delete m_stacks[ind];
|
||||
m_stacks[ind] = NULL;
|
||||
}
|
||||
|
||||
void Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists);
|
||||
|
||||
std::string Debug(const System &system) const;
|
||||
|
||||
protected:
|
||||
const Manager &m_mgr;
|
||||
std::vector<Stack*> m_stacks;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
@ -1,246 +0,0 @@
|
||||
/*
|
||||
* Search.cpp
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Search.h"
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../TrellisPath.h"
|
||||
#include "../Sentence.h"
|
||||
#include "../../TrellisPaths.h"
|
||||
#include "../../InputPathsBase.h"
|
||||
#include "../../InputPathBase.h"
|
||||
#include "../../System.h"
|
||||
#include "../../TranslationTask.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
#include "../../PhraseBased/TargetPhrases.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningMiniStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
Search::Search(Manager &mgr) :
|
||||
Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool())
|
||||
|
||||
, m_queue(QueueItemOrderer(),
|
||||
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(
|
||||
MemPoolAllocator<QueueItem*>(mgr.GetPool())))
|
||||
|
||||
, m_seenPositions(
|
||||
MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
|
||||
|
||||
, m_queueItemRecycler(MemPoolAllocator<QueueItem*>(mgr.GetPool()))
|
||||
|
||||
{
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
|
||||
|
||||
// init cue edges
|
||||
m_cubeEdges.resize(sentence.GetSize() + 1);
|
||||
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
|
||||
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges(
|
||||
m_cubeEdgeAlloc);
|
||||
}
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
|
||||
initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
//cerr << "initHypo=" << *initHypo << endl;
|
||||
|
||||
m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
PostDecode(0);
|
||||
|
||||
for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1;
|
||||
++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
m_stack.Clear();
|
||||
Decode(stackInd);
|
||||
PostDecode(stackInd);
|
||||
|
||||
//m_stack.DebugCounts();
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
Recycler<HypothesisBase*> &hypoRecycler = mgr.GetHypoRecycle();
|
||||
|
||||
// reuse queue from previous stack. Clear it first
|
||||
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(
|
||||
m_queue);
|
||||
//cerr << "container=" << container.size() << endl;
|
||||
BOOST_FOREACH(QueueItem *item, container){
|
||||
// recycle unused hypos from queue
|
||||
Hypothesis *hypo = item->hypo;
|
||||
hypoRecycler.Recycle(hypo);
|
||||
|
||||
// recycle queue item
|
||||
m_queueItemRecycler.push_back(item);
|
||||
}
|
||||
container.clear();
|
||||
|
||||
m_seenPositions.clear();
|
||||
|
||||
// add top hypo from every edge into queue
|
||||
CubeEdges &edges = *m_cubeEdges[stackInd];
|
||||
|
||||
BOOST_FOREACH(CubeEdge *edge, edges){
|
||||
//cerr << *edge << " ";
|
||||
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "edges: ";
|
||||
boost::unordered_set<const Bitmap*> uniqueBM;
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
uniqueBM.insert(&edge->newBitmap);
|
||||
//cerr << *edge << " ";
|
||||
}
|
||||
cerr << edges.size() << " " << uniqueBM.size();
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
size_t pops = 0;
|
||||
while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) {
|
||||
// get best hypo from queue, add to stack
|
||||
//cerr << "queue=" << queue.size() << endl;
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
CubeEdge *edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
|
||||
if (mgr.system.options.cube.lazy_scoring) {
|
||||
hypo->EvaluateWhenApplied();
|
||||
}
|
||||
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
|
||||
|
||||
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
|
||||
++pops;
|
||||
}
|
||||
|
||||
// create hypo from every edge. Increase diversity
|
||||
if (mgr.system.options.cube.diversity) {
|
||||
while (!m_queue.empty()) {
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
if (item->hypoIndex == 0 && item->tpIndex == 0) {
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stack.Add(hypo, hypoRecycler, mgr.arcLists);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::PostDecode(size_t stackInd)
|
||||
{
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
const Matrix<InputPath*> &pathMatrix = paths.GetMatrix();
|
||||
size_t inputSize = pathMatrix.GetRows();
|
||||
size_t numPaths = pathMatrix.GetCols();
|
||||
|
||||
BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()){
|
||||
const Bitmap &hypoBitmap = *val.first.first;
|
||||
size_t firstGap = hypoBitmap.GetFirstGapPos();
|
||||
size_t hypoEndPos = val.first.second;
|
||||
//cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl;
|
||||
|
||||
// create edges to next hypos from existing hypos
|
||||
for (size_t startPos = firstGap; startPos < inputSize; ++startPos) {
|
||||
for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) {
|
||||
const InputPath *path = pathMatrix.GetValue(startPos, pathInd);
|
||||
|
||||
if (path == NULL) {
|
||||
break;
|
||||
}
|
||||
if (path->GetNumRules() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Range &pathRange = path->range;
|
||||
//cerr << "pathRange=" << pathRange << endl;
|
||||
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
|
||||
if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
size_t numWords = newBitmap.GetNumWordsCovered();
|
||||
|
||||
CubeEdges &edges = *m_cubeEdges[numWords];
|
||||
|
||||
// sort hypo for a particular bitmap and hypoEndPos
|
||||
const Hypotheses &sortedHypos = val.second->GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = path->targetPhrases[i];
|
||||
if (tps && tps->GetSize()) {
|
||||
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
|
||||
edges.push_back(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
const Hypothesis *bestHypo = m_stack.GetBestHypo();
|
||||
return bestHypo;
|
||||
}
|
||||
|
||||
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
|
||||
{
|
||||
const Stack::Coll &coll = m_stack.GetColl();
|
||||
BOOST_FOREACH(const Stack::Coll::value_type &val, coll){
|
||||
const Moses2::HypothesisColl &hypos = *val.second;
|
||||
const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) {
|
||||
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
|
||||
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
|
||||
paths.Add(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,125 +0,0 @@
|
||||
/*
|
||||
* Stack.cpp
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Stack.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../Scores.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningMiniStack
|
||||
{
|
||||
Stack::Stack(const Manager &mgr) :
|
||||
m_mgr(mgr), m_coll(
|
||||
MemPoolAllocator<std::pair<HypoCoverage, Moses2::HypothesisColl*> >(
|
||||
mgr.GetPool())), m_miniStackRecycler(
|
||||
MemPoolAllocator<Moses2::HypothesisColl*>(mgr.GetPool()))
|
||||
{
|
||||
}
|
||||
|
||||
Stack::~Stack()
|
||||
{
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll){
|
||||
const Moses2::HypothesisColl *miniStack = val.second;
|
||||
delete miniStack;
|
||||
}
|
||||
|
||||
while (!m_miniStackRecycler.empty()) {
|
||||
Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back();
|
||||
m_miniStackRecycler.pop_back();
|
||||
delete miniStack;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::Add(Hypothesis *hypo, Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists)
|
||||
{
|
||||
HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
|
||||
Moses2::HypothesisColl &coll = GetMiniStack(key);
|
||||
coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists);
|
||||
}
|
||||
|
||||
const Hypothesis *Stack::GetBestHypo() const
|
||||
{
|
||||
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
|
||||
const HypothesisBase *bestHypo = NULL;
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll){
|
||||
const Moses2::HypothesisColl &hypos = *val.second;
|
||||
const Moses2::HypothesisBase *hypo = hypos.GetBestHypo();
|
||||
|
||||
if (hypo && hypo->GetFutureScore() > bestScore) {
|
||||
bestScore = hypo->GetFutureScore();
|
||||
bestHypo = hypo;
|
||||
}
|
||||
}
|
||||
return &bestHypo->Cast<Hypothesis>();
|
||||
}
|
||||
|
||||
size_t Stack::GetHypoSize() const
|
||||
{
|
||||
size_t ret = 0;
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll){
|
||||
const Moses2::HypothesisColl &hypos = *val.second;
|
||||
ret += hypos.GetSize();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key)
|
||||
{
|
||||
Moses2::HypothesisColl *ret;
|
||||
Coll::iterator iter = m_coll.find(key);
|
||||
if (iter == m_coll.end()) {
|
||||
if (m_miniStackRecycler.empty()) {
|
||||
ret = new Moses2::HypothesisColl(m_mgr);
|
||||
}
|
||||
else {
|
||||
ret = m_miniStackRecycler.back();
|
||||
ret->Clear();
|
||||
m_miniStackRecycler.pop_back();
|
||||
}
|
||||
|
||||
m_coll[key] = ret;
|
||||
}
|
||||
else {
|
||||
ret = iter->second;
|
||||
}
|
||||
return *ret;
|
||||
}
|
||||
|
||||
void Stack::Clear()
|
||||
{
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll){
|
||||
Moses2::HypothesisColl *miniStack = val.second;
|
||||
m_miniStackRecycler.push_back(miniStack);
|
||||
}
|
||||
|
||||
m_coll.clear();
|
||||
}
|
||||
|
||||
void Stack::DebugCounts()
|
||||
{
|
||||
cerr << "counts=";
|
||||
BOOST_FOREACH(const Coll::value_type &val, GetColl()){
|
||||
const Moses2::HypothesisColl &miniStack = *val.second;
|
||||
size_t count = miniStack.GetSize();
|
||||
cerr << count << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,281 +0,0 @@
|
||||
/*
|
||||
* Manager.cpp
|
||||
*
|
||||
* Created on: 23 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "Manager.h"
|
||||
#include "TargetPhraseImpl.h"
|
||||
#include "InputPath.h"
|
||||
#include "Sentence.h"
|
||||
|
||||
#include "Normal/Search.h"
|
||||
#include "CubePruningMiniStack/Search.h"
|
||||
#include "Batch/Search.h"
|
||||
|
||||
/*
|
||||
#include "CubePruningPerMiniStack/Search.h"
|
||||
#include "CubePruningPerBitmap/Search.h"
|
||||
#include "CubePruningCardinalStack/Search.h"
|
||||
#include "CubePruningBitmapStack/Search.h"
|
||||
*/
|
||||
#include "../TrellisPaths.h"
|
||||
#include "../System.h"
|
||||
#include "../Phrase.h"
|
||||
#include "../InputPathsBase.h"
|
||||
#include "../TranslationModel/PhraseTable.h"
|
||||
#include "../TranslationModel/UnknownWordPenalty.h"
|
||||
#include "../legacy/Range.h"
|
||||
#include "../PhraseBased/TargetPhrases.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
Manager::Manager(System &sys, const TranslationTask &task,
|
||||
const std::string &inputStr, long translationId) :
|
||||
ManagerBase(sys, task, inputStr, translationId)
|
||||
,m_search(NULL)
|
||||
,m_bitmaps(NULL)
|
||||
{
|
||||
//cerr << translationId << " inputStr=" << inputStr << endl;
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
//cerr << "Start ~Manager " << this << endl;
|
||||
delete m_search;
|
||||
delete m_bitmaps;
|
||||
//cerr << "Finish ~Manager " << this << endl;
|
||||
}
|
||||
|
||||
void Manager::Init()
|
||||
{
|
||||
// init pools etc
|
||||
InitPools();
|
||||
|
||||
FactorCollection &vocab = system.GetVocab();
|
||||
m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
|
||||
|
||||
m_bitmaps = new Bitmaps(GetPool());
|
||||
|
||||
const PhraseTable &firstPt = *system.featureFunctions.m_phraseTables[0];
|
||||
m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
|
||||
GetPool(), firstPt, system, 0);
|
||||
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
//cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
|
||||
|
||||
m_inputPaths.Init(sentence, *this);
|
||||
|
||||
// xml
|
||||
const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
|
||||
UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
|
||||
unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
|
||||
|
||||
// lookup with every pt
|
||||
const std::vector<const PhraseTable*> &pts = system.mappings;
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
const PhraseTable &pt = *pts[i];
|
||||
//cerr << "Looking up from " << pt.GetName() << endl;
|
||||
pt.Lookup(*this, m_inputPaths);
|
||||
}
|
||||
//m_inputPaths.DeleteUnusedPaths();
|
||||
CalcFutureScore();
|
||||
|
||||
m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
|
||||
|
||||
switch (system.options.search.algo) {
|
||||
case Normal:
|
||||
m_search = new NSNormal::Search(*this);
|
||||
break;
|
||||
case NormalBatch:
|
||||
m_search = new NSBatch::Search(*this);
|
||||
break;
|
||||
case CubePruning:
|
||||
case CubePruningMiniStack:
|
||||
m_search = new NSCubePruningMiniStack::Search(*this);
|
||||
break;
|
||||
/*
|
||||
case CubePruningPerMiniStack:
|
||||
m_search = new NSCubePruningPerMiniStack::Search(*this);
|
||||
break;
|
||||
case CubePruningPerBitmap:
|
||||
m_search = new NSCubePruningPerBitmap::Search(*this);
|
||||
break;
|
||||
case CubePruningCardinalStack:
|
||||
m_search = new NSCubePruningCardinalStack::Search(*this);
|
||||
break;
|
||||
case CubePruningBitmapStack:
|
||||
m_search = new NSCubePruningBitmapStack::Search(*this);
|
||||
break;
|
||||
*/
|
||||
default:
|
||||
cerr << "Unknown search algorithm" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::Decode()
|
||||
{
|
||||
//cerr << "Start Decode " << this << endl;
|
||||
|
||||
Init();
|
||||
m_search->Decode();
|
||||
|
||||
//cerr << "Finished Decode " << this << endl;
|
||||
}
|
||||
|
||||
void Manager::CalcFutureScore()
|
||||
{
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
size_t size = sentence.GetSize();
|
||||
m_estimatedScores =
|
||||
new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
|
||||
size);
|
||||
m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
|
||||
|
||||
// walk all the translation options and record the cheapest option for each span
|
||||
BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
|
||||
const Range &range = path->range;
|
||||
SCORE bestScore = -numeric_limits<SCORE>::infinity();
|
||||
|
||||
size_t numPt = system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
|
||||
if (tps) {
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
|
||||
SCORE score = tp->GetFutureScore();
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
|
||||
}
|
||||
|
||||
// now fill all the cells in the strictly upper triangle
|
||||
// there is no way to modify the diagonal now, in the case
|
||||
// where no translation option covers a single-word span,
|
||||
// we leave the +inf in the matrix
|
||||
// like in chart parsing we want each cell to contain the highest score
|
||||
// of the full-span trOpt or the sum of scores of joining two smaller spans
|
||||
|
||||
for (size_t colstart = 1; colstart < size; colstart++) {
|
||||
for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
|
||||
size_t sPos = diagshift;
|
||||
size_t ePos = colstart + diagshift;
|
||||
for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
|
||||
float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
|
||||
+ m_estimatedScores->GetValue(joinAt + 1, ePos);
|
||||
// uncomment to see the cell filling scheme
|
||||
// TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
|
||||
// << sPos << "," << joinAt << "]+["
|
||||
// << joinAt+1 << "," << ePos << "] (colstart: "
|
||||
// << colstart << ", diagshift: " << diagshift << ")"
|
||||
// << endl);
|
||||
|
||||
if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
|
||||
sPos, ePos, joinedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cerr << "Square matrix:" << endl;
|
||||
//cerr << *m_estimatedScores << endl;
|
||||
}
|
||||
|
||||
std::string Manager::OutputBest() const
|
||||
{
|
||||
stringstream out;
|
||||
Moses2::FixPrecision(out);
|
||||
|
||||
const Hypothesis *bestHypo = m_search->GetBestHypo();
|
||||
if (bestHypo) {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << bestHypo->GetScores().GetTotalScore() << " ";
|
||||
}
|
||||
|
||||
bestHypo->OutputToStream(out);
|
||||
//cerr << "BEST TRANSLATION: " << *bestHypo;
|
||||
}
|
||||
else {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << "0 ";
|
||||
}
|
||||
//cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
//cerr << endl;
|
||||
}
|
||||
|
||||
std::string Manager::OutputNBest()
|
||||
{
|
||||
arcLists.Sort();
|
||||
|
||||
boost::unordered_set<size_t> distinctHypos;
|
||||
|
||||
TrellisPaths<TrellisPath> contenders;
|
||||
m_search->AddInitialTrellisPaths(contenders);
|
||||
|
||||
long transId = GetTranslationId();
|
||||
|
||||
// MAIN LOOP
|
||||
stringstream out;
|
||||
//Moses2::FixPrecision(out);
|
||||
|
||||
size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
|
||||
size_t bestInd = 0;
|
||||
for (size_t i = 0; i < maxIter; ++i) {
|
||||
if (bestInd > system.options.nbest.nbest_size || contenders.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
//cerr << "bestInd=" << bestInd << endl;
|
||||
TrellisPath *path = contenders.Get();
|
||||
|
||||
bool ok = false;
|
||||
if (system.options.nbest.only_distinct) {
|
||||
string tgtPhrase = path->OutputTargetPhrase(system);
|
||||
//cerr << "tgtPhrase=" << tgtPhrase << endl;
|
||||
boost::hash<std::string> string_hash;
|
||||
size_t hash = string_hash(tgtPhrase);
|
||||
|
||||
if (distinctHypos.insert(hash).second) {
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ok = true;
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
++bestInd;
|
||||
out << transId << " ||| ";
|
||||
path->OutputToStream(out, system);
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
// create next paths
|
||||
path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
|
||||
|
||||
delete path;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string Manager::OutputTransOpt()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,167 +0,0 @@
|
||||
/*
|
||||
* SearchNormal.cpp
|
||||
*
|
||||
* Created on: 25 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Search.h"
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../TrellisPath.h"
|
||||
#include "../Sentence.h"
|
||||
#include "../../TrellisPaths.h"
|
||||
#include "../../InputPathsBase.h"
|
||||
#include "../../Phrase.h"
|
||||
#include "../../System.h"
|
||||
#include "../../PhraseBased/TargetPhrases.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace NSNormal
|
||||
{
|
||||
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
, m_stacks(mgr)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init stacks
|
||||
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
|
||||
m_stacks.Init(mgr, sentence.GetSize() + 1);
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
|
||||
initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
Decode(stackInd);
|
||||
//cerr << m_stacks << endl;
|
||||
|
||||
// delete stack to save mem
|
||||
if (stackInd < m_stacks.GetSize() - 1) {
|
||||
m_stacks.Delete(stackInd);
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
Stack &stack = m_stacks[stackInd];
|
||||
if (&stack == &m_stacks.Back()) {
|
||||
// last stack. don't do anythin
|
||||
return;
|
||||
}
|
||||
|
||||
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
//cerr << "hypos=" << hypos.size() << endl;
|
||||
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPathBase *path, paths){
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
|
||||
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const InputPath &path)
|
||||
{
|
||||
const Bitmap &hypoBitmap = hypo.GetBitmap();
|
||||
const Range &hypoRange = hypo.GetInputPath().range;
|
||||
const Range &pathRange = path.range;
|
||||
|
||||
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
|
||||
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// extend this hypo
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
|
||||
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
const TargetPhrases **tpsAllPt = path.targetPhrases;
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = tpsAllPt[i];
|
||||
if (tps) {
|
||||
Extend(hypo, *tps, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
|
||||
Extend(hypo, *tp, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
|
||||
newHypo->EvaluateWhenApplied();
|
||||
|
||||
m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
|
||||
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr,
|
||||
mgr.arcLists);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = static_cast<const Hypothesis*>(sortedHypos[0]);
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
|
||||
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
|
||||
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
|
||||
paths.Add(path);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
}
|
||||
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
* PhraseImpl.cpp
|
||||
*
|
||||
* Created on: 19 Feb 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#include "PhraseImpl.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -1,31 +0,0 @@
|
||||
#pragma once
|
||||
#include "../PhraseImplTemplate.h"
|
||||
#include "../SubPhrase.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class PhraseImpl: public PhraseImplTemplate<Word>
|
||||
{
|
||||
public:
|
||||
static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab,
|
||||
const System &system, const std::string &str)
|
||||
{
|
||||
std::vector<std::string> toks = Moses2::Tokenize(str);
|
||||
size_t size = toks.size();
|
||||
PhraseImpl *ret;
|
||||
|
||||
ret = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, size);
|
||||
|
||||
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks);
|
||||
return ret;
|
||||
}
|
||||
|
||||
PhraseImpl(MemPool &pool, size_t size) :
|
||||
PhraseImplTemplate<Word>(pool, size)
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,174 +0,0 @@
|
||||
/*
|
||||
* Sentence.cpp
|
||||
*
|
||||
* Created on: 14 Dec 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/property_tree/ptree.hpp>
|
||||
#include <boost/property_tree/xml_parser.hpp>
|
||||
#include "Sentence.h"
|
||||
#include "../System.h"
|
||||
#include "../parameters/AllOptions.h"
|
||||
#include "../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
|
||||
const System &system, const std::string &str)
|
||||
{
|
||||
Sentence *ret;
|
||||
|
||||
if (system.options.input.xml_policy) {
|
||||
// xml
|
||||
ret = CreateFromStringXML(pool, vocab, system, str);
|
||||
}
|
||||
else {
|
||||
// no xml
|
||||
//cerr << "PB Sentence" << endl;
|
||||
std::vector<std::string> toks = Tokenize(str);
|
||||
|
||||
size_t size = toks.size();
|
||||
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
|
||||
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
|
||||
}
|
||||
|
||||
//cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
|
||||
//cerr << "ret=" << ret->Debug(system) << endl;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
|
||||
const System &system, const std::string &str)
|
||||
{
|
||||
Sentence *ret;
|
||||
|
||||
vector<XMLOption*> xmlOptions;
|
||||
pugi::xml_document doc;
|
||||
|
||||
string str2 = "<xml>" + str + "</xml>";
|
||||
pugi::xml_parse_result result = doc.load(str2.c_str(),
|
||||
pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
|
||||
pugi::xml_node topNode = doc.child("xml");
|
||||
|
||||
std::vector<std::string> toks;
|
||||
XMLParse(pool, system, 0, topNode, toks, xmlOptions);
|
||||
|
||||
// debug
|
||||
/*
|
||||
cerr << "xmloptions:" << endl;
|
||||
for (size_t i = 0; i < xmlOptions.size(); ++i) {
|
||||
cerr << xmlOptions[i]->Debug(system) << endl;
|
||||
}
|
||||
*/
|
||||
|
||||
// create words
|
||||
size_t size = toks.size();
|
||||
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
|
||||
ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
|
||||
|
||||
// xml
|
||||
ret->Init(system, size, system.options.reordering.max_distortion);
|
||||
|
||||
ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
|
||||
|
||||
// set reordering walls, if "-monotone-at-punction" is set
|
||||
if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
|
||||
reorderingConstraint.SetMonotoneAtPunctuation(*ret);
|
||||
}
|
||||
|
||||
// set walls obtained from xml
|
||||
for(size_t i=0; i<xmlOptions.size(); i++) {
|
||||
const XMLOption *xmlOption = xmlOptions[i];
|
||||
if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
|
||||
UTIL_THROW_IF2(xmlOption->startPos >= ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please
|
||||
reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
|
||||
}
|
||||
else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
|
||||
reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
|
||||
}
|
||||
else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
|
||||
FactorType placeholderFactor = system.options.input.placeholder_factor;
|
||||
UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
|
||||
"Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
|
||||
UTIL_THROW_IF2(xmlOption->phraseSize != 1,
|
||||
"Placeholder must only cover 1 word");
|
||||
|
||||
const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
|
||||
(*ret)[xmlOption->startPos][placeholderFactor] = factor;
|
||||
}
|
||||
else {
|
||||
// default - forced translation. Add to class variable
|
||||
ret->AddXMLOption(system, xmlOption);
|
||||
}
|
||||
}
|
||||
reorderingConstraint.FinalizeWalls();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Sentence::XMLParse(
|
||||
MemPool &pool,
|
||||
const System &system,
|
||||
size_t depth,
|
||||
const pugi::xml_node &parentNode,
|
||||
std::vector<std::string> &toks,
|
||||
vector<XMLOption*> &xmlOptions)
|
||||
{ // pugixml
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
|
||||
string nodeName = childNode.name();
|
||||
//cerr << depth << " nodeName=" << nodeName << endl;
|
||||
|
||||
int startPos = toks.size();
|
||||
|
||||
string value = childNode.value();
|
||||
if (!value.empty()) {
|
||||
//cerr << depth << "childNode text=" << value << endl;
|
||||
std::vector<std::string> subPhraseToks = Tokenize(value);
|
||||
for (size_t i = 0; i < subPhraseToks.size(); ++i) {
|
||||
toks.push_back(subPhraseToks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nodeName.empty()) {
|
||||
XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
|
||||
|
||||
pugi::xml_attribute attr;
|
||||
attr = childNode.attribute("translation");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->SetTranslation(pool, attr.as_string());
|
||||
}
|
||||
|
||||
attr = childNode.attribute("entity");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->SetEntity(pool, attr.as_string());
|
||||
}
|
||||
|
||||
attr = childNode.attribute("prob");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->prob = attr.as_float();
|
||||
}
|
||||
|
||||
xmlOptions.push_back(xmlOption);
|
||||
|
||||
// recursively call this function. For proper recursive trees
|
||||
XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
|
||||
|
||||
size_t endPos = toks.size();
|
||||
xmlOption->phraseSize = endPos - startPos;
|
||||
|
||||
/*
|
||||
cerr << "xmlOptions=";
|
||||
xmlOption->Debug(cerr, system);
|
||||
cerr << endl;
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Moses2 */
|
||||
|
@ -1,155 +0,0 @@
|
||||
/*
|
||||
* Sentence.cpp
|
||||
*
|
||||
* Created on: 14 Dec 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Sentence.h"
|
||||
#include "../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace SCFG
|
||||
{
|
||||
Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
|
||||
const System &system, const std::string &str, long translationId)
|
||||
{
|
||||
//cerr << "SCFG Sentence" << endl;
|
||||
|
||||
Sentence *ret;
|
||||
|
||||
if (system.options.input.xml_policy) {
|
||||
// xml
|
||||
ret = CreateFromStringXML(pool, vocab, system, str);
|
||||
//cerr << "ret=" << ret->Debug(system) << endl;
|
||||
}
|
||||
else {
|
||||
std::vector<std::string> toks = Tokenize(str);
|
||||
size_t size = toks.size() + 2;
|
||||
|
||||
ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
|
||||
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
|
||||
const System &system, const std::string &str)
|
||||
{
|
||||
Sentence *ret;
|
||||
|
||||
vector<XMLOption*> xmlOptions;
|
||||
pugi::xml_document doc;
|
||||
|
||||
string str2 = "<xml>" + str + "</xml>";
|
||||
pugi::xml_parse_result result = doc.load(str2.c_str(),
|
||||
pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
|
||||
pugi::xml_node topNode = doc.child("xml");
|
||||
|
||||
std::vector<std::string> toks;
|
||||
XMLParse(pool, system, 0, topNode, toks, xmlOptions);
|
||||
|
||||
// debug
|
||||
/*
|
||||
cerr << "xmloptions:" << endl;
|
||||
for (size_t i = 0; i < xmlOptions.size(); ++i) {
|
||||
cerr << xmlOptions[i]->Debug(system) << endl;
|
||||
}
|
||||
*/
|
||||
|
||||
// create words
|
||||
size_t size = toks.size() + 2;
|
||||
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
|
||||
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
|
||||
|
||||
// xml
|
||||
for(size_t i=0; i<xmlOptions.size(); i++) {
|
||||
const XMLOption *xmlOption = xmlOptions[i];
|
||||
if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
|
||||
FactorType placeholderFactor = system.options.input.placeholder_factor;
|
||||
UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
|
||||
"Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
|
||||
UTIL_THROW_IF2(xmlOption->phraseSize != 1,
|
||||
"Placeholder must only cover 1 word");
|
||||
|
||||
const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
|
||||
(*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
|
||||
}
|
||||
else {
|
||||
// default - forced translation. Add to class variable
|
||||
ret->AddXMLOption(system, xmlOption);
|
||||
}
|
||||
}
|
||||
|
||||
//cerr << "ret=" << ret->Debug(system) << endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Sentence::XMLParse(
|
||||
MemPool &pool,
|
||||
const System &system,
|
||||
size_t depth,
|
||||
const pugi::xml_node &parentNode,
|
||||
std::vector<std::string> &toks,
|
||||
vector<XMLOption*> &xmlOptions)
|
||||
{ // pugixml
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
|
||||
string nodeName = childNode.name();
|
||||
//cerr << depth << " nodeName=" << nodeName << endl;
|
||||
|
||||
int startPos = toks.size();
|
||||
|
||||
string value = childNode.value();
|
||||
if (!value.empty()) {
|
||||
//cerr << depth << "childNode text=" << value << endl;
|
||||
std::vector<std::string> subPhraseToks = Tokenize(value);
|
||||
for (size_t i = 0; i < subPhraseToks.size(); ++i) {
|
||||
toks.push_back(subPhraseToks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nodeName.empty()) {
|
||||
XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
|
||||
|
||||
pugi::xml_attribute attr;
|
||||
attr = childNode.attribute("translation");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->SetTranslation(pool, attr.as_string());
|
||||
}
|
||||
|
||||
attr = childNode.attribute("entity");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->SetEntity(pool, attr.as_string());
|
||||
}
|
||||
|
||||
attr = childNode.attribute("prob");
|
||||
if (!attr.empty()) {
|
||||
xmlOption->prob = attr.as_float();
|
||||
}
|
||||
|
||||
xmlOptions.push_back(xmlOption);
|
||||
|
||||
// recursively call this function. For proper recursive trees
|
||||
XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
|
||||
|
||||
size_t endPos = toks.size();
|
||||
xmlOption->phraseSize = endPos - startPos;
|
||||
|
||||
/*
|
||||
cerr << "xmlOptions=";
|
||||
xmlOption->Debug(cerr, system);
|
||||
cerr << endl;
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} /* namespace Moses2 */
|
||||
|
@ -1,74 +0,0 @@
|
||||
/*
|
||||
* KBestExtractor.cpp
|
||||
*
|
||||
* Created on: 2 Aug 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include <sstream>
|
||||
#include "KBestExtractor.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../Stacks.h"
|
||||
#include "../Stack.h"
|
||||
#include "../Sentence.h"
|
||||
#include "../../System.h"
|
||||
#include "../../Scores.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
//bool g_debug = false;
|
||||
|
||||
namespace SCFG
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
|
||||
:m_mgr(mgr)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
KBestExtractor::~KBestExtractor()
|
||||
{
|
||||
}
|
||||
|
||||
void KBestExtractor::OutputToStream(std::stringstream &strm)
|
||||
{
|
||||
//cerr << "1" << flush;
|
||||
const Stack &lastStack = m_mgr.GetStacks().GetLastStack();
|
||||
UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack");
|
||||
UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection");
|
||||
|
||||
const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos();
|
||||
UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection");
|
||||
const HypothesisBase *hypo = hypos[0];
|
||||
|
||||
const ArcLists &arcLists = m_mgr.arcLists;
|
||||
const ArcList &arcList = arcLists.GetArcList(hypo);
|
||||
NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList);
|
||||
|
||||
size_t ind = 0;
|
||||
while (nbests.Extend(m_mgr, m_nbestColl, ind)) {
|
||||
const NBest &deriv = nbests.Get(ind);
|
||||
strm << m_mgr.GetTranslationId() << " ||| ";
|
||||
//cerr << "1" << flush;
|
||||
strm << deriv.GetStringExclSentenceMarkers();
|
||||
//cerr << "2" << flush;
|
||||
strm << " ||| ";
|
||||
deriv.GetScores().OutputBreakdown(strm, m_mgr.system);
|
||||
//cerr << "3" << flush;
|
||||
strm << "||| ";
|
||||
strm << deriv.GetScores().GetTotalScore();
|
||||
//cerr << "4" << flush;
|
||||
|
||||
strm << endl;
|
||||
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} /* namespace Moses2 */
|
@ -1,194 +0,0 @@
|
||||
/*
|
||||
* NBest.cpp
|
||||
*
|
||||
* Created on: 24 Aug 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <sstream>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "util/exception.hh"
|
||||
#include "NBest.h"
|
||||
#include "NBests.h"
|
||||
#include "NBestColl.h"
|
||||
#include "../Manager.h"
|
||||
#include "../TargetPhraseImpl.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace SCFG
|
||||
{
|
||||
|
||||
NBest::NBest(
|
||||
const SCFG::Manager &mgr,
|
||||
const ArcList &varcList,
|
||||
size_t vind,
|
||||
NBestColl &nbestColl)
|
||||
:arcList(&varcList)
|
||||
,arcInd(vind)
|
||||
{
|
||||
const SCFG::Hypothesis &hypo = GetHypo();
|
||||
|
||||
// copy scores from best hypo
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_scores = new (pool.Allocate<Scores>())
|
||||
Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores());
|
||||
|
||||
// children
|
||||
const ArcLists &arcLists = mgr.arcLists;
|
||||
//const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
|
||||
|
||||
const Vector<const Hypothesis*> &prevHypos = hypo.GetPrevHypos();
|
||||
for (size_t i = 0; i < prevHypos.size(); ++i) {
|
||||
const SCFG::Hypothesis *prevHypo = prevHypos[i];
|
||||
const ArcList &childArc = arcLists.GetArcList(prevHypo);
|
||||
NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc);
|
||||
Child child(&childNBests, 0);
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
stringstream strm;
|
||||
OutputToStream(mgr, strm, nbestColl);
|
||||
m_str = strm.str();
|
||||
}
|
||||
|
||||
NBest::NBest(const SCFG::Manager &mgr,
|
||||
const NBest &orig,
|
||||
size_t childInd,
|
||||
NBestColl &nbestColl)
|
||||
:arcList(orig.arcList)
|
||||
,arcInd(orig.arcInd)
|
||||
,children(orig.children)
|
||||
{
|
||||
Child &child = children[childInd];
|
||||
size_t &ind = child.second;
|
||||
++ind;
|
||||
UTIL_THROW_IF2(ind >= child.first->GetSize(),
|
||||
"out of bound:" << ind << ">=" << child.first->GetSize());
|
||||
|
||||
// scores
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_scores = new (pool.Allocate<Scores>())
|
||||
Scores(mgr.system,
|
||||
pool,
|
||||
mgr.system.featureFunctions.GetNumScores(),
|
||||
orig.GetScores());
|
||||
|
||||
const Scores &origScores = orig.GetChild(childInd).GetScores();
|
||||
const Scores &newScores = GetChild(childInd).GetScores();
|
||||
|
||||
m_scores->MinusEquals(mgr.system, origScores);
|
||||
m_scores->PlusEquals(mgr.system, newScores);
|
||||
|
||||
stringstream strm;
|
||||
OutputToStream(mgr, strm, nbestColl);
|
||||
m_str = strm.str();
|
||||
}
|
||||
|
||||
const SCFG::Hypothesis &NBest::GetHypo() const
|
||||
{
|
||||
const HypothesisBase *hypoBase = (*arcList)[arcInd];
|
||||
const SCFG::Hypothesis &hypo = *static_cast<const SCFG::Hypothesis*>(hypoBase);
|
||||
return hypo;
|
||||
}
|
||||
|
||||
const NBest &NBest::GetChild(size_t ind) const
|
||||
{
|
||||
const Child &child = children[ind];
|
||||
const NBests &nbests = *child.first;
|
||||
const NBest &nbest = nbests.Get(child.second);
|
||||
return nbest;
|
||||
}
|
||||
|
||||
|
||||
void NBest::CreateDeviants(
|
||||
const SCFG::Manager &mgr,
|
||||
NBestColl &nbestColl,
|
||||
Contenders &contenders) const
|
||||
{
|
||||
if (arcInd + 1 < arcList->size()) {
|
||||
// to use next arclist, all children must be 1st. Not sure if this is correct
|
||||
bool ok = true;
|
||||
BOOST_FOREACH(const Child &child, children) {
|
||||
if (child.second) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl);
|
||||
contenders.push(next);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t childInd = 0; childInd < children.size(); ++childInd) {
|
||||
const Child &child = children[childInd];
|
||||
NBests &childNBests = *child.first;
|
||||
bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1);
|
||||
if (extended) {
|
||||
//cerr << "HH1 " << childInd << endl;
|
||||
NBest *next = new NBest(mgr, *this, childInd, nbestColl);
|
||||
|
||||
//cerr << "HH2 " << childInd << endl;
|
||||
contenders.push(next);
|
||||
//cerr << "HH3 " << childInd << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NBest::OutputToStream(
|
||||
const SCFG::Manager &mgr,
|
||||
std::stringstream &strm,
|
||||
const NBestColl &nbestColl) const
|
||||
{
|
||||
const SCFG::Hypothesis &hypo = GetHypo();
|
||||
//strm << &hypo << " ";
|
||||
|
||||
const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
|
||||
|
||||
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
||||
const SCFG::Word &word = tp[targetPos];
|
||||
//cerr << "word " << pos << "=" << word << endl;
|
||||
if (word.isNonTerminal) {
|
||||
//cerr << "is nt" << endl;
|
||||
// non-term. fill out with prev hypo
|
||||
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos];
|
||||
|
||||
UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size());
|
||||
|
||||
const NBest &nbest = GetChild(nonTermInd);
|
||||
strm << nbest.GetString();
|
||||
}
|
||||
else {
|
||||
//cerr << "not nt" << endl;
|
||||
word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm);
|
||||
|
||||
strm << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string NBest::Debug(const System &system) const
|
||||
{
|
||||
stringstream strm;
|
||||
strm << GetScores().GetTotalScore() << " "
|
||||
<< arcList << "("
|
||||
<< arcList->size() << ")["
|
||||
<< arcInd << "] ";
|
||||
for (size_t i = 0; i < children.size(); ++i) {
|
||||
const Child &child = children[i];
|
||||
const NBest &childNBest = child.first->Get(child.second);
|
||||
|
||||
strm << child.first << "("
|
||||
<< child.first->GetSize() << ")["
|
||||
<< child.second << "]";
|
||||
strm << childNBest.GetScores().GetTotalScore() << " ";
|
||||
}
|
||||
return strm.str();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,100 +0,0 @@
|
||||
/*
|
||||
* NBest.h
|
||||
*
|
||||
* Created on: 24 Aug 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
#include "../../Scores.h"
|
||||
#include "../../ArcLists.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Scores;
|
||||
class System;
|
||||
|
||||
namespace SCFG
|
||||
{
|
||||
class NBest;
|
||||
class NBests;
|
||||
class NBestScoreOrderer;
|
||||
class Manager;
|
||||
class NBestColl;
|
||||
class Hypothesis;
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
typedef std::priority_queue<NBest*, std::vector<NBest*>, NBestScoreOrderer> Contenders;
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
class NBest
|
||||
{
|
||||
public:
|
||||
const ArcList *arcList;
|
||||
size_t arcInd;
|
||||
|
||||
typedef std::pair<NBests*, size_t> Child; // key to another NBest
|
||||
typedef std::vector<Child> Children;
|
||||
Children children;
|
||||
|
||||
NBest(const SCFG::Manager &mgr,
|
||||
const ArcList &varcList,
|
||||
size_t vind,
|
||||
NBestColl &nbestColl);
|
||||
|
||||
NBest(const SCFG::Manager &mgr,
|
||||
const NBest &orig,
|
||||
size_t childInd,
|
||||
NBestColl &nbestColl);
|
||||
|
||||
|
||||
void CreateDeviants(
|
||||
const SCFG::Manager &mgr,
|
||||
NBestColl &nbestColl,
|
||||
Contenders &contenders) const;
|
||||
|
||||
const Scores &GetScores() const
|
||||
{ return *m_scores; }
|
||||
|
||||
const NBest &GetChild(size_t ind) const;
|
||||
|
||||
const std::string &GetString() const
|
||||
{ return m_str; }
|
||||
|
||||
std::string GetStringExclSentenceMarkers() const
|
||||
{
|
||||
std::string ret = m_str.substr(4, m_str.size() - 10);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string Debug(const System &system) const;
|
||||
|
||||
protected:
|
||||
Scores *m_scores;
|
||||
std::string m_str;
|
||||
|
||||
const SCFG::Hypothesis &GetHypo() const;
|
||||
|
||||
void OutputToStream(
|
||||
const SCFG::Manager &mgr,
|
||||
std::stringstream &strm,
|
||||
const NBestColl &nbestColl) const;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
class NBestScoreOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(const NBest* a, const NBest* b) const
|
||||
{
|
||||
return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,111 +0,0 @@
|
||||
/*
|
||||
* NBests.cpp
|
||||
*
|
||||
* Created on: 24 Aug 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include "NBests.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace SCFG
|
||||
{
|
||||
NBests::NBests(const SCFG::Manager &mgr,
|
||||
const ArcList &arcList,
|
||||
NBestColl &nbestColl)
|
||||
:indIter(0)
|
||||
{
|
||||
// best
|
||||
NBest *contender = new NBest(mgr, arcList, 0, nbestColl);
|
||||
contenders.push(contender);
|
||||
bool extended = Extend(mgr, nbestColl, 0);
|
||||
assert(extended);
|
||||
}
|
||||
|
||||
NBests::~NBests()
|
||||
{
|
||||
BOOST_FOREACH(const NBest *nbest, m_coll) {
|
||||
delete nbest;
|
||||
}
|
||||
|
||||
// delete bad contenders left in queue
|
||||
while (!contenders.empty()) {
|
||||
NBest *contender = contenders.top();
|
||||
contenders.pop();
|
||||
delete contender;
|
||||
}
|
||||
}
|
||||
|
||||
bool NBests::Extend(const SCFG::Manager &mgr,
|
||||
NBestColl &nbestColl,
|
||||
size_t ind)
|
||||
{
|
||||
if (ind < m_coll.size()) {
|
||||
// asking for 1 we've dont already
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(ind == m_coll.size());
|
||||
|
||||
// checks
|
||||
if (ind >= mgr.system.options.nbest.nbest_size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor;
|
||||
|
||||
// MAIN LOOP, create 1 new deriv.
|
||||
// The loop is for distinct nbest
|
||||
bool ok = false;
|
||||
while (!ok) {
|
||||
++indIter;
|
||||
if (indIter > maxIter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (contenders.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NBest *contender = contenders.top();
|
||||
contenders.pop();
|
||||
|
||||
contender->CreateDeviants(mgr, nbestColl, contenders);
|
||||
|
||||
if (mgr.system.options.nbest.only_distinct) {
|
||||
const string &tgtPhrase = contender->GetString();
|
||||
//cerr << "tgtPhrase=" << tgtPhrase << endl;
|
||||
boost::hash<std::string> string_hash;
|
||||
size_t hash = string_hash(tgtPhrase);
|
||||
|
||||
if (distinctHypos.insert(hash).second) {
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ok = true;
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
Add(contender);
|
||||
//cerr << best->GetScores().GetTotalScore() << " ";
|
||||
//cerr << best->Debug(mgr.system) << endl;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
delete contender;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,53 +0,0 @@
|
||||
/*
|
||||
* NBests.h
|
||||
*
|
||||
* Created on: 24 Aug 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include "NBest.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
namespace SCFG
|
||||
{
|
||||
|
||||
class NBests
|
||||
{
|
||||
public:
|
||||
Contenders contenders;
|
||||
boost::unordered_set<size_t> distinctHypos;
|
||||
|
||||
NBests(const SCFG::Manager &mgr,
|
||||
const ArcList &arcList,
|
||||
NBestColl &nbestColl);
|
||||
|
||||
virtual ~NBests();
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
const NBest &Get(size_t ind) const
|
||||
{ return *m_coll[ind]; }
|
||||
|
||||
bool Extend(const SCFG::Manager &mgr,
|
||||
NBestColl &nbestColl,
|
||||
size_t ind);
|
||||
|
||||
protected:
|
||||
std::vector<const NBest*> m_coll;
|
||||
size_t indIter;
|
||||
|
||||
void Add(const NBest *nbest)
|
||||
{
|
||||
m_coll.push_back(nbest);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,466 +0,0 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <deque>
|
||||
|
||||
#include "PhraseDecoder.h"
|
||||
#include "../../System.h"
|
||||
#include "../../SubPhrase.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
PhraseDecoder::PhraseDecoder(
|
||||
PhraseTableCompact &phraseDictionary,
|
||||
const std::vector<FactorType>* input,
|
||||
const std::vector<FactorType>* output,
|
||||
size_t numScoreComponent
|
||||
// , const std::vector<float>* weight
|
||||
)
|
||||
: m_coding(None), m_numScoreComponent(numScoreComponent),
|
||||
m_containsAlignmentInfo(true), m_maxRank(0),
|
||||
m_symbolTree(0), m_multipleScoreTrees(false),
|
||||
m_scoreTrees(1), m_alignTree(0),
|
||||
m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
|
||||
// m_weight(weight),
|
||||
m_separator(" ||| ")
|
||||
{ }
|
||||
|
||||
PhraseDecoder::~PhraseDecoder()
|
||||
{
|
||||
if(m_symbolTree)
|
||||
delete m_symbolTree;
|
||||
|
||||
for(size_t i = 0; i < m_scoreTrees.size(); i++)
|
||||
if(m_scoreTrees[i])
|
||||
delete m_scoreTrees[i];
|
||||
|
||||
if(m_alignTree)
|
||||
delete m_alignTree;
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
|
||||
{
|
||||
boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_sourceSymbolsMap.find(symbol);
|
||||
if(it != m_sourceSymbolsMap.end())
|
||||
return it->second;
|
||||
|
||||
size_t idx = m_sourceSymbols.find(symbol);
|
||||
m_sourceSymbolsMap[symbol] = idx;
|
||||
return idx;
|
||||
}
|
||||
|
||||
inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
|
||||
{
|
||||
if(idx < m_targetSymbols.size())
|
||||
return m_targetSymbols[idx];
|
||||
return std::string("##ERROR##");
|
||||
}
|
||||
|
||||
inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
|
||||
{
|
||||
return (encodedSymbol >> 30) + 1;
|
||||
}
|
||||
|
||||
inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
|
||||
{
|
||||
return (encodedSymbol >> 31) + 1;
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
|
||||
{
|
||||
size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
|
||||
return m_lexicalTable[srcTrgIdx + rank].second;
|
||||
}
|
||||
|
||||
size_t PhraseDecoder::GetMaxSourcePhraseLength()
|
||||
{
|
||||
return m_maxPhraseLength;
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
|
||||
{
|
||||
return encodedSymbol &= ~(3 << 30);
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
|
||||
{
|
||||
return encodedSymbol &= ~(255 << 24);
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
|
||||
{
|
||||
encodedSymbol &= ~(3 << 30);
|
||||
encodedSymbol >>= 24;
|
||||
return encodedSymbol;
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
|
||||
{
|
||||
return encodedSymbol &= ~(3 << 30);
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
|
||||
{
|
||||
return encodedSymbol &= ~(1 << 31);
|
||||
}
|
||||
|
||||
inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
|
||||
{
|
||||
return ((encodedSymbol >> 25) & 63) - 32;
|
||||
}
|
||||
|
||||
inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
|
||||
{
|
||||
return ((encodedSymbol >> 19) & 63) - 32;
|
||||
}
|
||||
|
||||
inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
|
||||
{
|
||||
return (encodedSymbol & 524287);
|
||||
}
|
||||
|
||||
size_t PhraseDecoder::Load(std::FILE* in)
|
||||
{
|
||||
size_t start = std::ftell(in);
|
||||
size_t read = 0;
|
||||
|
||||
read += std::fread(&m_coding, sizeof(m_coding), 1, in);
|
||||
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
|
||||
read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
|
||||
read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
|
||||
read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
|
||||
|
||||
if(m_coding == REnc) {
|
||||
m_sourceSymbols.load(in);
|
||||
|
||||
size_t size;
|
||||
read += std::fread(&size, sizeof(size_t), 1, in);
|
||||
m_lexicalTableIndex.resize(size);
|
||||
read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
|
||||
|
||||
read += std::fread(&size, sizeof(size_t), 1, in);
|
||||
m_lexicalTable.resize(size);
|
||||
read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
|
||||
}
|
||||
|
||||
m_targetSymbols.load(in);
|
||||
|
||||
m_symbolTree = new CanonicalHuffman<unsigned>(in);
|
||||
|
||||
read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
|
||||
if(m_multipleScoreTrees) {
|
||||
m_scoreTrees.resize(m_numScoreComponent);
|
||||
for(size_t i = 0; i < m_numScoreComponent; i++)
|
||||
m_scoreTrees[i] = new CanonicalHuffman<float>(in);
|
||||
} else {
|
||||
m_scoreTrees.resize(1);
|
||||
m_scoreTrees[0] = new CanonicalHuffman<float>(in);
|
||||
}
|
||||
|
||||
if(m_containsAlignmentInfo)
|
||||
m_alignTree = new CanonicalHuffman<AlignPoint>(in);
|
||||
|
||||
size_t end = std::ftell(in);
|
||||
return end - start;
|
||||
}
|
||||
|
||||
std::string PhraseDecoder::MakeSourceKey(std::string &source)
|
||||
{
|
||||
return source + m_separator;
|
||||
}
|
||||
|
||||
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
|
||||
const ManagerBase &mgr,
|
||||
const Phrase<Word> &sourcePhrase,
|
||||
bool topLevel,
|
||||
bool eval)
|
||||
{
|
||||
|
||||
// Not using TargetPhraseCollection avoiding "new" operator
|
||||
// which can introduce heavy locking with multiple threads
|
||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector());
|
||||
size_t bitsLeft = 0;
|
||||
|
||||
if(m_coding == PREnc) {
|
||||
std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
|
||||
= m_decodingCache.Retrieve(sourcePhrase);
|
||||
|
||||
// Has been cached and is complete or does not need to be completed
|
||||
if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
|
||||
return cachedPhraseColl.first;
|
||||
|
||||
// Has been cached, but is incomplete
|
||||
else if(cachedPhraseColl.first != NULL) {
|
||||
bitsLeft = cachedPhraseColl.second;
|
||||
tpv->resize(cachedPhraseColl.first->size());
|
||||
std::copy(cachedPhraseColl.first->begin(),
|
||||
cachedPhraseColl.first->end(),
|
||||
tpv->begin());
|
||||
}
|
||||
}
|
||||
|
||||
// Retrieve source phrase identifier
|
||||
std::string sourcePhraseString = sourcePhrase.GetString(*m_input);
|
||||
size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
|
||||
/*
|
||||
cerr << "sourcePhraseString=" << sourcePhraseString << " "
|
||||
<< sourcePhraseId
|
||||
<< endl;
|
||||
*/
|
||||
if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) {
|
||||
// Retrieve compressed and encoded target phrase collection
|
||||
std::string encodedPhraseCollection;
|
||||
if(m_phraseDictionary.m_inMemory)
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
|
||||
else
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
|
||||
|
||||
BitWrapper<> encodedBitStream(encodedPhraseCollection);
|
||||
if(m_coding == PREnc && bitsLeft)
|
||||
encodedBitStream.SeekFromEnd(bitsLeft);
|
||||
|
||||
// Decompress and decode target phrase collection
|
||||
TargetPhraseVectorPtr decodedPhraseColl =
|
||||
DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval);
|
||||
|
||||
return decodedPhraseColl;
|
||||
} else
|
||||
return TargetPhraseVectorPtr();
|
||||
}
|
||||
|
||||
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
const ManagerBase &mgr,
|
||||
TargetPhraseVectorPtr tpv,
|
||||
BitWrapper<> &encodedBitStream,
|
||||
const Phrase<Word> &sourcePhrase,
|
||||
bool topLevel,
|
||||
bool eval)
|
||||
{
|
||||
const System &system = mgr.system;
|
||||
FactorCollection &vocab = system.GetVocab();
|
||||
|
||||
bool extending = tpv->size();
|
||||
size_t bitsLeft = encodedBitStream.TellFromEnd();
|
||||
|
||||
std::vector<int> sourceWords;
|
||||
if(m_coding == REnc) {
|
||||
for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
|
||||
std::string sourceWord
|
||||
= sourcePhrase[i].GetString(*m_input);
|
||||
unsigned idx = GetSourceSymbolId(sourceWord);
|
||||
sourceWords.push_back(idx);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned phraseStopSymbol = 0;
|
||||
AlignPoint alignStopSymbol(-1, -1);
|
||||
|
||||
std::vector<float> scores;
|
||||
std::set<AlignPointSizeT> alignment;
|
||||
|
||||
enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
|
||||
|
||||
size_t srcSize = sourcePhrase.GetSize();
|
||||
|
||||
TPCompact* targetPhrase = NULL;
|
||||
while(encodedBitStream.TellFromEnd()) {
|
||||
|
||||
if(state == New) {
|
||||
// Creating new TargetPhrase on the heap
|
||||
tpv->push_back(TPCompact());
|
||||
targetPhrase = &tpv->back();
|
||||
|
||||
alignment.clear();
|
||||
scores.clear();
|
||||
|
||||
state = Symbol;
|
||||
}
|
||||
|
||||
if(state == Symbol) {
|
||||
unsigned symbol = m_symbolTree->Read(encodedBitStream);
|
||||
if(symbol == phraseStopSymbol) {
|
||||
state = Score;
|
||||
} else {
|
||||
if(m_coding == REnc) {
|
||||
std::string wordString;
|
||||
size_t type = GetREncType(symbol);
|
||||
|
||||
if(type == 1) {
|
||||
unsigned decodedSymbol = DecodeREncSymbol1(symbol);
|
||||
wordString = GetTargetSymbol(decodedSymbol);
|
||||
} else if (type == 2) {
|
||||
size_t rank = DecodeREncSymbol2Rank(symbol);
|
||||
size_t srcPos = DecodeREncSymbol2Position(symbol);
|
||||
|
||||
if(srcPos >= sourceWords.size())
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
|
||||
if(m_phraseDictionary.m_useAlignmentInfo) {
|
||||
size_t trgPos = targetPhrase->words.size();
|
||||
alignment.insert(AlignPoint(srcPos, trgPos));
|
||||
}
|
||||
} else if(type == 3) {
|
||||
size_t rank = DecodeREncSymbol3(symbol);
|
||||
size_t srcPos = targetPhrase->words.size();
|
||||
|
||||
if(srcPos >= sourceWords.size())
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
|
||||
if(m_phraseDictionary.m_useAlignmentInfo) {
|
||||
size_t trgPos = srcPos;
|
||||
alignment.insert(AlignPoint(srcPos, trgPos));
|
||||
}
|
||||
}
|
||||
|
||||
Word word;
|
||||
word.CreateFromString(vocab, system, wordString);
|
||||
targetPhrase->words.push_back(word);
|
||||
} else if(m_coding == PREnc) {
|
||||
// if the symbol is just a word
|
||||
if(GetPREncType(symbol) == 1) {
|
||||
unsigned decodedSymbol = DecodePREncSymbol1(symbol);
|
||||
|
||||
Word word;
|
||||
word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
|
||||
targetPhrase->words.push_back(word);
|
||||
}
|
||||
// if the symbol is a subphrase pointer
|
||||
else {
|
||||
int left = DecodePREncSymbol2Left(symbol);
|
||||
int right = DecodePREncSymbol2Right(symbol);
|
||||
unsigned rank = DecodePREncSymbol2Rank(symbol);
|
||||
|
||||
int srcStart = left + targetPhrase->words.size();
|
||||
int srcEnd = srcSize - right - 1;
|
||||
|
||||
// false positive consistency check
|
||||
if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
// false positive consistency check
|
||||
if(m_maxRank && rank > m_maxRank)
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
// set subphrase by default to itself
|
||||
TargetPhraseVectorPtr subTpv = tpv;
|
||||
|
||||
// if range smaller than source phrase retrieve subphrase
|
||||
if(unsigned(srcEnd - srcStart + 1) != srcSize) {
|
||||
SubPhrase<Word> subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1);
|
||||
subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false);
|
||||
} else {
|
||||
// false positive consistency check
|
||||
if(rank >= tpv->size()-1)
|
||||
return TargetPhraseVectorPtr();
|
||||
}
|
||||
|
||||
// false positive consistency check
|
||||
if(subTpv != NULL && rank < subTpv->size()) {
|
||||
// insert the subphrase into the main target phrase
|
||||
TPCompact& subTp = subTpv->at(rank);
|
||||
if(m_phraseDictionary.m_useAlignmentInfo) {
|
||||
// reconstruct the alignment data based on the alignment of the subphrase
|
||||
for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
|
||||
it != subTp.alignment.end(); it++) {
|
||||
alignment.insert(AlignPointSizeT(srcStart + it->first,
|
||||
targetPhrase->words.size() + it->second));
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
|
||||
} else
|
||||
return TargetPhraseVectorPtr();
|
||||
}
|
||||
} else {
|
||||
Word word;
|
||||
word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
|
||||
targetPhrase->words.push_back(word);
|
||||
}
|
||||
}
|
||||
} else if(state == Score) {
|
||||
size_t idx = m_multipleScoreTrees ? scores.size() : 0;
|
||||
float score = m_scoreTrees[idx]->Read(encodedBitStream);
|
||||
scores.push_back(score);
|
||||
|
||||
if(scores.size() == m_numScoreComponent) {
|
||||
targetPhrase->scores = scores;
|
||||
|
||||
if(m_containsAlignmentInfo)
|
||||
state = Alignment;
|
||||
else
|
||||
state = Add;
|
||||
}
|
||||
} else if(state == Alignment) {
|
||||
AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
|
||||
if(alignPoint == alignStopSymbol) {
|
||||
state = Add;
|
||||
} else {
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
alignment.insert(AlignPointSizeT(alignPoint));
|
||||
}
|
||||
}
|
||||
|
||||
if(state == Add) {
|
||||
if(m_phraseDictionary.m_useAlignmentInfo) {
|
||||
size_t sourceSize = sourcePhrase.GetSize();
|
||||
size_t targetSize = targetPhrase->words.size();
|
||||
for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
|
||||
if(it->first >= sourceSize || it->second >= targetSize)
|
||||
return TargetPhraseVectorPtr();
|
||||
}
|
||||
targetPhrase->alignment = alignment;
|
||||
}
|
||||
|
||||
if(m_coding == PREnc) {
|
||||
if(!m_maxRank || tpv->size() <= m_maxRank)
|
||||
bitsLeft = encodedBitStream.TellFromEnd();
|
||||
|
||||
if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
|
||||
break;
|
||||
}
|
||||
|
||||
if(encodedBitStream.TellFromEnd() <= 8)
|
||||
break;
|
||||
|
||||
state = New;
|
||||
}
|
||||
}
|
||||
|
||||
if(m_coding == PREnc && !extending) {
|
||||
bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
|
||||
m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
|
||||
}
|
||||
|
||||
return tpv;
|
||||
}
|
||||
|
||||
void PhraseDecoder::PruneCache()
|
||||
{
|
||||
m_decodingCache.Prune();
|
||||
}
|
||||
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "PhraseTableCompact.h"
|
||||
#include "StringVector.h"
|
||||
#include "CanonicalHuffman.h"
|
||||
#include "TargetPhraseCollectionCache.h"
|
||||
|
||||
#include "../../Phrase.h"
|
||||
#include "../../ManagerBase.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class PhraseTableCompact;
|
||||
|
||||
class PhraseDecoder
|
||||
{
|
||||
protected:
|
||||
|
||||
friend class PhraseTableCompact;
|
||||
|
||||
typedef std::pair<unsigned char, unsigned char> AlignPoint;
|
||||
typedef std::pair<unsigned, unsigned> SrcTrg;
|
||||
|
||||
enum Coding { None, REnc, PREnc } m_coding;
|
||||
|
||||
size_t m_numScoreComponent;
|
||||
bool m_containsAlignmentInfo;
|
||||
size_t m_maxRank;
|
||||
size_t m_maxPhraseLength;
|
||||
|
||||
boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
|
||||
StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
|
||||
StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
|
||||
|
||||
std::vector<size_t> m_lexicalTableIndex;
|
||||
std::vector<SrcTrg> m_lexicalTable;
|
||||
|
||||
CanonicalHuffman<unsigned>* m_symbolTree;
|
||||
|
||||
bool m_multipleScoreTrees;
|
||||
std::vector<CanonicalHuffman<float>*> m_scoreTrees;
|
||||
|
||||
CanonicalHuffman<AlignPoint>* m_alignTree;
|
||||
|
||||
TargetPhraseCollectionCache m_decodingCache;
|
||||
|
||||
PhraseTableCompact& m_phraseDictionary;
|
||||
|
||||
// ***********************************************
|
||||
|
||||
const std::vector<FactorType>* m_input;
|
||||
const std::vector<FactorType>* m_output;
|
||||
|
||||
std::string m_separator;
|
||||
|
||||
// ***********************************************
|
||||
|
||||
unsigned GetSourceSymbolId(std::string& s);
|
||||
std::string GetTargetSymbol(unsigned id) const;
|
||||
|
||||
size_t GetREncType(unsigned encodedSymbol);
|
||||
size_t GetPREncType(unsigned encodedSymbol);
|
||||
|
||||
unsigned GetTranslation(unsigned srcIdx, size_t rank);
|
||||
|
||||
size_t GetMaxSourcePhraseLength();
|
||||
|
||||
unsigned DecodeREncSymbol1(unsigned encodedSymbol);
|
||||
unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
|
||||
unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
|
||||
unsigned DecodeREncSymbol3(unsigned encodedSymbol);
|
||||
|
||||
unsigned DecodePREncSymbol1(unsigned encodedSymbol);
|
||||
int DecodePREncSymbol2Left(unsigned encodedSymbol);
|
||||
int DecodePREncSymbol2Right(unsigned encodedSymbol);
|
||||
unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
|
||||
|
||||
std::string MakeSourceKey(std::string &);
|
||||
|
||||
public:
|
||||
|
||||
PhraseDecoder(
|
||||
PhraseTableCompact &phraseDictionary,
|
||||
const std::vector<FactorType>* input,
|
||||
const std::vector<FactorType>* output,
|
||||
size_t numScoreComponent
|
||||
);
|
||||
|
||||
~PhraseDecoder();
|
||||
|
||||
size_t Load(std::FILE* in);
|
||||
|
||||
TargetPhraseVectorPtr CreateTargetPhraseCollection(
|
||||
const ManagerBase &mgr,
|
||||
const Phrase<Word> &sourcePhrase,
|
||||
bool topLevel = false,
|
||||
bool eval = true);
|
||||
|
||||
TargetPhraseVectorPtr DecodeCollection(
|
||||
const ManagerBase &mgr,
|
||||
TargetPhraseVectorPtr tpv,
|
||||
BitWrapper<> &encodedBitStream,
|
||||
const Phrase<Word> &sourcePhrase,
|
||||
bool topLevel,
|
||||
bool eval);
|
||||
|
||||
void PruneCache();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,222 +0,0 @@
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/thread/tss.hpp>
|
||||
#include "PhraseTableCompact.h"
|
||||
#include "PhraseDecoder.h"
|
||||
#include "../../PhraseBased/InputPath.h"
|
||||
#include "../../PhraseBased/Manager.h"
|
||||
#include "../../PhraseBased/TargetPhrases.h"
|
||||
#include "../../PhraseBased/TargetPhraseImpl.h"
|
||||
#include "../../PhraseBased/Sentence.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
bool PhraseTableCompact::s_inMemoryByDefault = false;
|
||||
|
||||
PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line)
|
||||
:PhraseTable(startInd, line)
|
||||
,m_inMemory(s_inMemoryByDefault)
|
||||
,m_useAlignmentInfo(true)
|
||||
,m_hash(10, 16)
|
||||
,m_phraseDecoder(0)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
PhraseTableCompact::~PhraseTableCompact()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void PhraseTableCompact::Load(System &system)
|
||||
{
|
||||
std::string tFilePath = m_path;
|
||||
|
||||
std::string suffix = ".minphr";
|
||||
if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
|
||||
if (!FileExists(tFilePath))
|
||||
throw runtime_error("Error: File " + tFilePath + " does not exist.");
|
||||
|
||||
m_phraseDecoder
|
||||
= new PhraseDecoder(*this, &m_input, &m_output, GetNumScores());
|
||||
|
||||
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
|
||||
|
||||
size_t indexSize;
|
||||
//if(m_inMemory)
|
||||
// Load source phrase index into memory
|
||||
indexSize = m_hash.Load(pFile);
|
||||
// else
|
||||
// Keep source phrase index on disk
|
||||
//indexSize = m_hash.LoadIndex(pFile);
|
||||
|
||||
size_t coderSize = m_phraseDecoder->Load(pFile);
|
||||
|
||||
size_t phraseSize;
|
||||
if(m_inMemory) {
|
||||
// Load target phrase collections into memory
|
||||
phraseSize = m_targetPhrasesMemory.load(pFile, false);
|
||||
}
|
||||
else {
|
||||
// Keep target phrase collections on disk
|
||||
phraseSize = m_targetPhrasesMapped.load(pFile, true);
|
||||
}
|
||||
|
||||
UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
|
||||
"Not successfully loaded");
|
||||
}
|
||||
|
||||
void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "blah") {
|
||||
|
||||
}
|
||||
else {
|
||||
PhraseTable::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseTableCompact::CleanUpAfterSentenceProcessing() const
|
||||
{
|
||||
//if(!m_sentenceCache.get())
|
||||
// m_sentenceCache.reset(new PhraseCache());
|
||||
|
||||
m_phraseDecoder->PruneCache();
|
||||
//m_sentenceCache->clear();
|
||||
}
|
||||
|
||||
|
||||
// pb
|
||||
void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
|
||||
{
|
||||
size_t inputSize = static_cast<const Sentence&>(mgr.GetInput()).GetSize();
|
||||
InputPaths &inputPathsCast = static_cast<InputPaths&>(inputPaths);
|
||||
|
||||
for (size_t i = 0; i < inputSize; ++i) {
|
||||
for (size_t startPos = 0; startPos < inputSize; ++startPos) {
|
||||
size_t endPos = startPos + i;
|
||||
if (endPos >= inputSize) {
|
||||
break;
|
||||
}
|
||||
InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i);
|
||||
//cerr << "path=" << path->Debug(mgr.system) << endl;
|
||||
TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
|
||||
path->AddTargetPhrases(*this, tps);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool,
|
||||
InputPath &inputPath) const
|
||||
{
|
||||
TargetPhrases *ret = NULL;
|
||||
|
||||
const Phrase<Word> &sourcePhrase = inputPath.subPhrase;
|
||||
//cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl;
|
||||
|
||||
// There is no souch source phrase if source phrase is longer than longest
|
||||
// observed source phrase during compilation
|
||||
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
|
||||
return ret;
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
= m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true);
|
||||
|
||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||
//TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
|
||||
ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, decodedPhraseColl->size());
|
||||
|
||||
for (size_t i = 0; i < decodedPhraseColl->size(); ++i) {
|
||||
const TPCompact &tpCompact = decodedPhraseColl->at(i);
|
||||
const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase);
|
||||
|
||||
ret->AddTargetPhrase(*tp);
|
||||
}
|
||||
|
||||
ret->SortAndPrune(m_tableLimit);
|
||||
mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase);
|
||||
|
||||
//cerr << "RET2=" << ret->Debug(mgr.system) << endl;
|
||||
/*
|
||||
// Cache phrase pair for clean-up or retrieval with PREnc
|
||||
const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
|
||||
|
||||
return phraseColl;
|
||||
*/
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase(
|
||||
const Manager &mgr,
|
||||
const TPCompact &tpCompact,
|
||||
const Phrase<Word> &sourcePhrase) const
|
||||
{
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
size_t size = tpCompact.words.size();
|
||||
TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size);
|
||||
|
||||
// words
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const Word &compactWord = tpCompact.words[i];
|
||||
Word &tpWord = (*ret)[i];
|
||||
tpWord = compactWord;
|
||||
}
|
||||
|
||||
// scores
|
||||
Scores &scores = ret->GetScores();
|
||||
scores.Assign(mgr.system, *this, tpCompact.scores);
|
||||
|
||||
// align
|
||||
ret->SetAlignTerm(tpCompact.alignment);
|
||||
|
||||
// score
|
||||
mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret);
|
||||
|
||||
// Cache phrase pair for clean-up or retrieval with PREnc
|
||||
//const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
|
||||
|
||||
//cerr << "ret=" << ret->Debug(mgr.system) << endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// scfg
|
||||
void PhraseTableCompact::InitActiveChart(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
SCFG::InputPath &path) const
|
||||
{
|
||||
UTIL_THROW2("Not implemented");
|
||||
}
|
||||
|
||||
void PhraseTableCompact::Lookup(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
size_t maxChartSpan,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const
|
||||
{
|
||||
UTIL_THROW2("Not implemented");
|
||||
}
|
||||
|
||||
void PhraseTableCompact::LookupGivenNode(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::ActiveChartEntry &prevEntry,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const
|
||||
{
|
||||
UTIL_THROW2("Not implemented");
|
||||
}
|
||||
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
#pragma once
|
||||
#include "../PhraseTable.h"
|
||||
#include "BlockHashIndex.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class PhraseDecoder;
|
||||
class TPCompact;
|
||||
|
||||
class PhraseTableCompact: public PhraseTable
|
||||
{
|
||||
public:
|
||||
PhraseTableCompact(size_t startInd, const std::string &line);
|
||||
virtual ~PhraseTableCompact();
|
||||
void Load(System &system);
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual void CleanUpAfterSentenceProcessing() const;
|
||||
|
||||
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
|
||||
InputPath &inputPath) const;
|
||||
|
||||
// scfg
|
||||
virtual void InitActiveChart(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
|
||||
|
||||
virtual void Lookup(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
size_t maxChartSpan,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
protected:
|
||||
static bool s_inMemoryByDefault;
|
||||
bool m_inMemory;
|
||||
bool m_useAlignmentInfo;
|
||||
|
||||
BlockHashIndex m_hash;
|
||||
|
||||
StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
|
||||
StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
|
||||
|
||||
friend class PhraseDecoder;
|
||||
PhraseDecoder* m_phraseDecoder;
|
||||
|
||||
const TargetPhraseImpl *CreateTargetPhrase(
|
||||
const Manager &mgr,
|
||||
const TPCompact &tpCompact,
|
||||
const Phrase<Word> &sourcePhrase) const;
|
||||
|
||||
// SCFG
|
||||
virtual void LookupGivenNode(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::ActiveChartEntry &prevEntry,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,266 +0,0 @@
|
||||
/*
|
||||
* StoreTarget.cpp
|
||||
*
|
||||
* Created on: 19 Jan 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "StoreTarget.h"
|
||||
#include "line_splitter.hh"
|
||||
#include "probing_hash_utils.hh"
|
||||
#include "../../legacy/OutputFileStream.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
StoreTarget::StoreTarget(const std::string &basepath)
|
||||
:m_basePath(basepath)
|
||||
,m_vocab(basepath + "/TargetVocab.dat")
|
||||
{
|
||||
std::string path = basepath + "/TargetColl.dat";
|
||||
m_fileTargetColl.open(path.c_str(),
|
||||
std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc);
|
||||
if (!m_fileTargetColl.is_open()) {
|
||||
throw "can't create file ";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StoreTarget::~StoreTarget()
|
||||
{
|
||||
assert(m_coll.empty());
|
||||
m_fileTargetColl.close();
|
||||
|
||||
// vocab
|
||||
m_vocab.Save();
|
||||
}
|
||||
|
||||
uint64_t StoreTarget::Save()
|
||||
{
|
||||
uint64_t ret = m_fileTargetColl.tellp();
|
||||
|
||||
// save to disk
|
||||
uint64_t numTP = m_coll.size();
|
||||
m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t));
|
||||
|
||||
for (size_t i = 0; i < m_coll.size(); ++i) {
|
||||
Save(*m_coll[i]);
|
||||
}
|
||||
|
||||
// clear coll
|
||||
RemoveAllInColl(m_coll);
|
||||
m_coll.clear();
|
||||
|
||||
// starting position of coll
|
||||
return ret;
|
||||
}
|
||||
|
||||
void StoreTarget::Save(const target_text &rule)
|
||||
{
|
||||
// metadata for each tp
|
||||
TargetPhraseInfo tpInfo;
|
||||
tpInfo.alignTerm = GetAlignId(rule.word_align_term);
|
||||
tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term);
|
||||
tpInfo.numWords = rule.target_phrase.size();
|
||||
tpInfo.propLength = rule.property.size();
|
||||
|
||||
//cerr << "TPInfo=" << sizeof(TPInfo);
|
||||
m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo));
|
||||
|
||||
// scores
|
||||
for (size_t i = 0; i < rule.prob.size(); ++i) {
|
||||
float prob = rule.prob[i];
|
||||
m_fileTargetColl.write((char*) &prob, sizeof(prob));
|
||||
}
|
||||
|
||||
// tp
|
||||
for (size_t i = 0; i < rule.target_phrase.size(); ++i) {
|
||||
uint32_t vocabId = rule.target_phrase[i];
|
||||
m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId));
|
||||
}
|
||||
|
||||
// prop TODO
|
||||
|
||||
}
|
||||
|
||||
void StoreTarget::SaveAlignment()
|
||||
{
|
||||
std::string path = m_basePath + "/Alignments.dat";
|
||||
Moses2::OutputFileStream file(path);
|
||||
|
||||
BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
|
||||
file << valPair.second << "\t";
|
||||
|
||||
const std::vector<size_t> &aligns = valPair.first;
|
||||
BOOST_FOREACH(size_t align, aligns) {
|
||||
file << align << " ";
|
||||
}
|
||||
file << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
|
||||
{
|
||||
target_text *rule = new target_text;
|
||||
//cerr << "line.target_phrase=" << line.target_phrase << endl;
|
||||
|
||||
// target_phrase
|
||||
vector<bool> nonTerms;
|
||||
util::TokenIter<util::SingleCharacter> it;
|
||||
it = util::TokenIter<util::SingleCharacter>(line.target_phrase,
|
||||
util::SingleCharacter(' '));
|
||||
while (it) {
|
||||
StringPiece word = *it;
|
||||
//cerr << "word=" << word << endl;
|
||||
|
||||
bool nonTerm = false;
|
||||
if (scfg) {
|
||||
// not really sure how to handle factored SCFG and NT
|
||||
if (scfg && word[0] == '[' && word[word.size() - 1] == ']') {
|
||||
//cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl;
|
||||
nonTerm = true;
|
||||
}
|
||||
nonTerms.push_back(nonTerm);
|
||||
}
|
||||
|
||||
util::TokenIter<util::SingleCharacter> itFactor;
|
||||
itFactor = util::TokenIter<util::SingleCharacter>(word,
|
||||
util::SingleCharacter('|'));
|
||||
while (itFactor) {
|
||||
StringPiece factor = *itFactor;
|
||||
|
||||
string factorStr = factor.as_string();
|
||||
uint32_t vocabId = m_vocab.GetVocabId(factorStr);
|
||||
|
||||
rule->target_phrase.push_back(vocabId);
|
||||
|
||||
itFactor++;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
// probs
|
||||
it = util::TokenIter<util::SingleCharacter>(line.prob,
|
||||
util::SingleCharacter(' '));
|
||||
while (it) {
|
||||
string tok = it->as_string();
|
||||
float prob = Scan<float>(tok);
|
||||
|
||||
if (log_prob) {
|
||||
prob = FloorScore(log(prob));
|
||||
if (prob == 0.0f) prob = 0.0000000001;
|
||||
}
|
||||
|
||||
rule->prob.push_back(prob);
|
||||
it++;
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "nonTerms=";
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
cerr << nonTerms[i] << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
// alignment
|
||||
it = util::TokenIter<util::SingleCharacter>(line.word_align,
|
||||
util::SingleCharacter(' '));
|
||||
while (it) {
|
||||
string tokPair = Trim(it->as_string());
|
||||
if (tokPair.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
|
||||
assert(alignPair.size() == 2);
|
||||
|
||||
bool nonTerm = false;
|
||||
size_t sourcePos = alignPair[0];
|
||||
size_t targetPos = alignPair[1];
|
||||
if (scfg) {
|
||||
nonTerm = nonTerms[targetPos];
|
||||
}
|
||||
|
||||
//cerr << targetPos << "=" << nonTerm << endl;
|
||||
|
||||
if (nonTerm) {
|
||||
rule->word_align_non_term.push_back(sourcePos);
|
||||
rule->word_align_non_term.push_back(targetPos);
|
||||
//cerr << (int) rule->word_all1.back() << " ";
|
||||
}
|
||||
else {
|
||||
rule->word_align_term.push_back(sourcePos);
|
||||
rule->word_align_term.push_back(targetPos);
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
// extra scores
|
||||
string prop = line.property.as_string();
|
||||
AppendLexRO(prop, rule->prob, log_prob);
|
||||
|
||||
//cerr << "line.property=" << line.property << endl;
|
||||
//cerr << "prop=" << prop << endl;
|
||||
|
||||
// properties
|
||||
/*
|
||||
for (size_t i = 0; i < prop.size(); ++i) {
|
||||
rule->property.push_back(prop[i]);
|
||||
}
|
||||
*/
|
||||
m_coll.push_back(rule);
|
||||
}
|
||||
|
||||
uint32_t StoreTarget::GetAlignId(const std::vector<size_t> &align)
|
||||
{
|
||||
boost::unordered_map<std::vector<size_t>, uint32_t>::iterator iter =
|
||||
m_aligns.find(align);
|
||||
if (iter == m_aligns.end()) {
|
||||
uint32_t ind = m_aligns.size();
|
||||
m_aligns[align] = ind;
|
||||
return ind;
|
||||
}
|
||||
else {
|
||||
return iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
|
||||
bool log_prob) const
|
||||
{
|
||||
size_t startPos = prop.find("{{LexRO ");
|
||||
|
||||
if (startPos != string::npos) {
|
||||
size_t endPos = prop.find("}}", startPos + 8);
|
||||
string lexProb = prop.substr(startPos + 8, endPos - startPos - 8);
|
||||
//cerr << "lexProb=" << lexProb << endl;
|
||||
|
||||
// append lex probs to pt probs
|
||||
vector<float> scores = Tokenize<float>(lexProb);
|
||||
|
||||
if (log_prob) {
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
scores[i] = FloorScore(log(scores[i]));
|
||||
if (scores[i] == 0.0f) scores[i] = 0.0000000001;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
retvector.push_back(scores[i]);
|
||||
}
|
||||
|
||||
// exclude LexRO property from property column
|
||||
prop = prop.substr(0, startPos)
|
||||
+ prop.substr(endPos + 2, prop.size() - endPos - 2);
|
||||
//cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Moses2 */
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* StoreTarget.h
|
||||
*
|
||||
* Created on: 19 Jan 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <inttypes.h>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include "StoreVocab.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class line_text;
|
||||
class target_text;
|
||||
|
||||
class StoreTarget
|
||||
{
|
||||
public:
|
||||
StoreTarget(const std::string &basepath);
|
||||
virtual ~StoreTarget();
|
||||
|
||||
uint64_t Save();
|
||||
void SaveAlignment();
|
||||
|
||||
void Append(const line_text &line, bool log_prob, bool scfg);
|
||||
protected:
|
||||
std::string m_basePath;
|
||||
std::fstream m_fileTargetColl;
|
||||
StoreVocab<uint32_t> m_vocab;
|
||||
|
||||
typedef boost::unordered_map<std::vector<size_t>, uint32_t> Alignments;
|
||||
Alignments m_aligns;
|
||||
|
||||
std::vector<target_text*> m_coll;
|
||||
|
||||
uint32_t GetAlignId(const std::vector<size_t> &align);
|
||||
void Save(const target_text &rule);
|
||||
|
||||
void AppendLexRO(std::string &prop, std::vector<float> &retvector,
|
||||
bool log_prob) const;
|
||||
|
||||
};
|
||||
|
||||
} /* namespace Moses2 */
|
||||
|
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* StoreVocab.h
|
||||
*
|
||||
* Created on: 15 Jun 2016
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "../../legacy/OutputFileStream.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
template<typename VOCABID>
|
||||
class StoreVocab
|
||||
{
|
||||
protected:
|
||||
std::string m_path;
|
||||
|
||||
typedef boost::unordered_map<std::string, VOCABID> Coll;
|
||||
Coll m_vocab;
|
||||
|
||||
public:
|
||||
StoreVocab(const std::string &path)
|
||||
:m_path(path)
|
||||
{}
|
||||
|
||||
virtual ~StoreVocab() {}
|
||||
|
||||
VOCABID GetVocabId(const std::string &word)
|
||||
{
|
||||
typename Coll::iterator iter = m_vocab.find(word);
|
||||
if (iter == m_vocab.end()) {
|
||||
VOCABID ind = m_vocab.size() + 1;
|
||||
m_vocab[word] = ind;
|
||||
return ind;
|
||||
}
|
||||
else {
|
||||
return iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
void Insert(VOCABID id, const std::string &word)
|
||||
{
|
||||
m_vocab[word] = id;
|
||||
}
|
||||
|
||||
void Save()
|
||||
{
|
||||
OutputFileStream strme(m_path);
|
||||
|
||||
typename Coll::const_iterator iter;
|
||||
for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) {
|
||||
strme << iter->first << "\t" << iter->second << std::endl;
|
||||
}
|
||||
|
||||
strme.Close();
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Moses2 */
|
||||
|
@ -1,44 +0,0 @@
|
||||
#include <iostream>
|
||||
#include "hash.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
uint64_t getHash(StringPiece text)
|
||||
{
|
||||
std::size_t len = text.size();
|
||||
uint64_t key = util::MurmurHashNative(text.data(), len);
|
||||
return key;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
|
||||
{
|
||||
//Tokenize
|
||||
std::vector<uint64_t> output;
|
||||
|
||||
util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
|
||||
|
||||
while (itWord) {
|
||||
StringPiece word = *itWord;
|
||||
uint64_t id = 0;
|
||||
|
||||
util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
|
||||
while (itFactor) {
|
||||
StringPiece factor = *itFactor;
|
||||
//cerr << "factor=" << factor << endl;
|
||||
|
||||
id += getHash(factor);
|
||||
itFactor++;
|
||||
}
|
||||
|
||||
output.push_back(id);
|
||||
itWord++;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,17 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/murmur_hash.hh"
|
||||
#include "util/string_piece.hh" //Tokenization and work with StringPiece
|
||||
#include "util/tokenize_piece.hh"
|
||||
#include <vector>
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
//Gets the MurmurmurHash for give string
|
||||
uint64_t getHash(StringPiece text);
|
||||
|
||||
std::vector<uint64_t> getVocabIDs(const StringPiece &textin);
|
||||
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
#include "util/file_piece.hh"
|
||||
#include <vector>
|
||||
#include <cstdlib> //atof
|
||||
#include "util/string_piece.hh" //Tokenization and work with StringPiece
|
||||
#include "util/tokenize_piece.hh"
|
||||
#include <vector>
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
//Struct for holding processed line
|
||||
struct line_text
|
||||
{
|
||||
StringPiece source_phrase;
|
||||
StringPiece target_phrase;
|
||||
StringPiece prob;
|
||||
StringPiece word_align;
|
||||
StringPiece counts;
|
||||
StringPiece sparse_score;
|
||||
StringPiece property;
|
||||
std::string property_to_be_binarized;
|
||||
};
|
||||
|
||||
//Struct for holding processed line
|
||||
struct target_text
|
||||
{
|
||||
std::vector<unsigned int> target_phrase;
|
||||
std::vector<float> prob;
|
||||
std::vector<size_t> word_align_term;
|
||||
std::vector<size_t> word_align_non_term;
|
||||
std::vector<char> counts;
|
||||
std::vector<char> sparse_score;
|
||||
std::vector<char> property;
|
||||
|
||||
/*
|
||||
void Reset()
|
||||
{
|
||||
target_phrase.clear();
|
||||
prob.clear();
|
||||
word_all1.clear();
|
||||
counts.clear();
|
||||
sparse_score.clear();
|
||||
property.clear();
|
||||
}
|
||||
*/
|
||||
};
|
||||
|
||||
//Ask if it's better to have it receive a pointer to a line_text struct
|
||||
line_text splitLine(const StringPiece &textin, bool scfg);
|
||||
void reformatSCFG(line_text &output);
|
||||
|
||||
std::vector<unsigned char> splitWordAll1(const StringPiece &textin);
|
||||
|
||||
}
|
||||
|
@ -1,50 +0,0 @@
|
||||
#include "probing_hash_utils.hh"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
//Read table from disk, return memory map location
|
||||
char * readTable(const char * filename, size_t size)
|
||||
{
|
||||
//Initial position of the file is the end of the file, thus we know the size
|
||||
int fd;
|
||||
char * map;
|
||||
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("Error opening file for reading");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
|
||||
if (map == MAP_FAILED) {
|
||||
close(fd);
|
||||
perror("Error mmapping the file");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
void serialize_table(char *mem, size_t size, const std::string &filename)
|
||||
{
|
||||
std::ofstream os(filename.c_str(), std::ios::binary);
|
||||
os.write((const char*) &mem[0], size);
|
||||
os.close();
|
||||
|
||||
}
|
||||
|
||||
uint64_t getKey(const uint64_t source_phrase[], size_t size)
|
||||
{
|
||||
//TOO SLOW
|
||||
//uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
|
||||
uint64_t key = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
key += (source_phrase[i] << i);
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,55 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/probing_hash_table.hh"
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
#define API_VERSION 15
|
||||
|
||||
//Hash table entry
|
||||
struct Entry
|
||||
{
|
||||
typedef uint64_t Key;
|
||||
Key key;
|
||||
|
||||
Key GetKey() const
|
||||
{
|
||||
return key;
|
||||
}
|
||||
|
||||
void SetKey(Key to)
|
||||
{
|
||||
key = to;
|
||||
}
|
||||
|
||||
uint64_t value;
|
||||
};
|
||||
|
||||
#define NONE std::numeric_limits<uint64_t>::max()
|
||||
|
||||
//Define table
|
||||
typedef util::ProbingHashTable<Entry, boost::hash<uint64_t> > Table;
|
||||
|
||||
void serialize_table(char *mem, size_t size, const std::string &filename);
|
||||
|
||||
char * readTable(const char * filename, size_t size);
|
||||
|
||||
uint64_t getKey(const uint64_t source_phrase[], size_t size);
|
||||
|
||||
struct TargetPhraseInfo
|
||||
{
|
||||
uint32_t alignTerm;
|
||||
uint32_t alignNonTerm;
|
||||
uint16_t numWords;
|
||||
uint16_t propLength;
|
||||
uint16_t filler;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,65 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <sys/stat.h> //For finding size of file
|
||||
#include "vocabid.hh"
|
||||
#include <algorithm> //toLower
|
||||
#include <deque>
|
||||
#include "probing_hash_utils.hh"
|
||||
#include "hash.hh" //Includes line splitter
|
||||
#include "line_splitter.hh"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class QueryEngine
|
||||
{
|
||||
std::map<uint64_t, std::string> source_vocabids;
|
||||
|
||||
typedef std::vector<unsigned char> Alignments;
|
||||
std::vector<Alignments> alignColl;
|
||||
|
||||
Table table;
|
||||
char *mem; //Memory for the table, necessary so that we can correctly destroy the object
|
||||
|
||||
size_t table_filesize;
|
||||
bool is_reordering;
|
||||
|
||||
void read_alignments(const std::string &alignPath);
|
||||
|
||||
public:
|
||||
int num_scores;
|
||||
int num_lex_scores;
|
||||
bool logProb;
|
||||
|
||||
QueryEngine(const char *);
|
||||
~QueryEngine();
|
||||
|
||||
std::pair<bool, uint64_t> query(uint64_t key);
|
||||
|
||||
const std::map<uint64_t, std::string> &getSourceVocab() const
|
||||
{ return source_vocabids; }
|
||||
|
||||
const std::vector<Alignments> &getAlignments() const
|
||||
{ return alignColl; }
|
||||
|
||||
uint64_t getKey(uint64_t source_phrase[], size_t size) const;
|
||||
|
||||
template<typename T>
|
||||
inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const
|
||||
{
|
||||
boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
|
||||
if (iter == keyValue.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::string &foundStr = iter->second;
|
||||
found = Scan<T>(foundStr);
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,303 +0,0 @@
|
||||
#include <sys/stat.h>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "line_splitter.hh"
|
||||
#include "storing.hh"
|
||||
#include "StoreTarget.h"
|
||||
#include "StoreVocab.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
#include "../../legacy/InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos)
|
||||
{
|
||||
if (pos < sourcePhrase.size()) {
|
||||
uint64_t vocabId = sourcePhrase[pos];
|
||||
|
||||
Node *child;
|
||||
Children::iterator iter = m_children.find(vocabId);
|
||||
if (iter == m_children.end()) {
|
||||
// New node. Write other children then discard them
|
||||
BOOST_FOREACH(Children::value_type &valPair, m_children) {
|
||||
Node &otherChild = valPair.second;
|
||||
otherChild.Write(table);
|
||||
}
|
||||
m_children.clear();
|
||||
|
||||
// create new node
|
||||
child = &m_children[vocabId];
|
||||
assert(!child->done);
|
||||
child->key = key + (vocabId << pos);
|
||||
}
|
||||
else {
|
||||
child = &iter->second;
|
||||
}
|
||||
|
||||
child->Add(table, sourcePhrase, pos + 1);
|
||||
}
|
||||
else {
|
||||
// this node was written previously 'cos it has rules
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Node::Write(Table &table)
|
||||
{
|
||||
//cerr << "START write " << done << " " << key << endl;
|
||||
BOOST_FOREACH(Children::value_type &valPair, m_children) {
|
||||
Node &child = valPair.second;
|
||||
child.Write(table);
|
||||
}
|
||||
|
||||
if (!done) {
|
||||
// save
|
||||
Entry sourceEntry;
|
||||
sourceEntry.value = NONE;
|
||||
sourceEntry.key = key;
|
||||
|
||||
//Put into table
|
||||
table.Insert(sourceEntry);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
void createProbingPT(const std::string &phrasetable_path,
|
||||
const std::string &basepath, int num_scores, int num_lex_scores,
|
||||
bool log_prob, int max_cache_size, bool scfg)
|
||||
{
|
||||
std::cerr << "Starting..." << std::endl;
|
||||
|
||||
//Get basepath and create directory if missing
|
||||
mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
|
||||
|
||||
StoreTarget storeTarget(basepath);
|
||||
|
||||
//Get uniq lines:
|
||||
unsigned long uniq_entries = countUniqueSource(phrasetable_path);
|
||||
|
||||
//Source phrase vocabids
|
||||
StoreVocab<uint64_t> sourceVocab(basepath + "/source_vocabids");
|
||||
|
||||
//Read the file
|
||||
util::FilePiece filein(phrasetable_path.c_str());
|
||||
|
||||
//Init the probing hash table
|
||||
size_t size = Table::Size(uniq_entries, 1.2);
|
||||
char * mem = new char[size];
|
||||
memset(mem, 0, size);
|
||||
Table sourceEntries(mem, size);
|
||||
|
||||
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> cache;
|
||||
float totalSourceCount = 0;
|
||||
|
||||
//Keep track of the size of each group of target phrases
|
||||
size_t line_num = 0;
|
||||
|
||||
//Read everything and processs
|
||||
std::string prevSource;
|
||||
|
||||
Node sourcePhrases;
|
||||
sourcePhrases.done = true;
|
||||
sourcePhrases.key = 0;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
//Process line read
|
||||
line_text line;
|
||||
line = splitLine(filein.ReadLine(), scfg);
|
||||
//cerr << "line=" << line.source_phrase << endl;
|
||||
|
||||
++line_num;
|
||||
if (line_num % 1000000 == 0) {
|
||||
std::cerr << line_num << " " << std::flush;
|
||||
}
|
||||
|
||||
//Add source phrases to vocabularyIDs
|
||||
add_to_map(sourceVocab, line.source_phrase);
|
||||
|
||||
if (prevSource.empty()) {
|
||||
// 1st line
|
||||
prevSource = line.source_phrase.as_string();
|
||||
storeTarget.Append(line, log_prob, scfg);
|
||||
}
|
||||
else if (prevSource == line.source_phrase) {
|
||||
//If we still have the same line, just append to it:
|
||||
storeTarget.Append(line, log_prob, scfg);
|
||||
}
|
||||
else {
|
||||
assert(prevSource != line.source_phrase);
|
||||
|
||||
//Create a new entry even
|
||||
|
||||
// save
|
||||
uint64_t targetInd = storeTarget.Save();
|
||||
|
||||
// next line
|
||||
storeTarget.Append(line, log_prob, scfg);
|
||||
|
||||
//Create an entry for the previous source phrase:
|
||||
Entry sourceEntry;
|
||||
sourceEntry.value = targetInd;
|
||||
//The key is the sum of hashes of individual words bitshifted by their position in the phrase.
|
||||
//Probably not entirerly correct, but fast and seems to work fine in practise.
|
||||
std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
|
||||
if (scfg) {
|
||||
// storing prefixes?
|
||||
sourcePhrases.Add(sourceEntries, vocabid_source);
|
||||
}
|
||||
sourceEntry.key = getKey(vocabid_source);
|
||||
|
||||
/*
|
||||
cerr << "prevSource=" << prevSource << flush
|
||||
<< " vocabids=" << Debug(vocabid_source) << flush
|
||||
<< " key=" << sourceEntry.key << endl;
|
||||
*/
|
||||
//Put into table
|
||||
sourceEntries.Insert(sourceEntry);
|
||||
|
||||
// update cache - CURRENT source phrase, not prev
|
||||
if (max_cache_size) {
|
||||
std::string countStr = line.counts.as_string();
|
||||
countStr = Trim(countStr);
|
||||
if (!countStr.empty()) {
|
||||
std::vector<float> toks = Tokenize<float>(countStr);
|
||||
//cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
|
||||
|
||||
if (toks.size() >= 2) {
|
||||
totalSourceCount += toks[1];
|
||||
|
||||
// compute key for CURRENT source
|
||||
std::vector<uint64_t> currVocabidSource = getVocabIDs(line.source_phrase.as_string());
|
||||
uint64_t currKey = getKey(currVocabidSource);
|
||||
|
||||
CacheItem *item = new CacheItem(
|
||||
Trim(line.source_phrase.as_string()),
|
||||
currKey,
|
||||
toks[1]);
|
||||
cache.push(item);
|
||||
|
||||
if (max_cache_size > 0 && cache.size() > max_cache_size) {
|
||||
cache.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Set prevLine
|
||||
prevSource = line.source_phrase.as_string();
|
||||
}
|
||||
|
||||
}
|
||||
catch (util::EndOfFileException e) {
|
||||
std::cerr
|
||||
<< "Reading phrase table finished, writing remaining files to disk."
|
||||
<< std::endl;
|
||||
|
||||
//After the final entry is constructed we need to add it to the phrase_table
|
||||
//Create an entry for the previous source phrase:
|
||||
uint64_t targetInd = storeTarget.Save();
|
||||
|
||||
Entry sourceEntry;
|
||||
sourceEntry.value = targetInd;
|
||||
|
||||
//The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
|
||||
std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
|
||||
sourceEntry.key = getKey(vocabid_source);
|
||||
|
||||
//Put into table
|
||||
sourceEntries.Insert(sourceEntry);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sourcePhrases.Write(sourceEntries);
|
||||
|
||||
storeTarget.SaveAlignment();
|
||||
|
||||
serialize_table(mem, size, (basepath + "/probing_hash.dat"));
|
||||
|
||||
sourceVocab.Save();
|
||||
|
||||
serialize_cache(cache, (basepath + "/cache"), totalSourceCount);
|
||||
|
||||
delete[] mem;
|
||||
|
||||
//Write configfile
|
||||
std::ofstream configfile;
|
||||
configfile.open((basepath + "/config").c_str());
|
||||
configfile << "API_VERSION\t" << API_VERSION << '\n';
|
||||
configfile << "uniq_entries\t" << uniq_entries << '\n';
|
||||
configfile << "num_scores\t" << num_scores << '\n';
|
||||
configfile << "num_lex_scores\t" << num_lex_scores << '\n';
|
||||
configfile << "log_prob\t" << log_prob << '\n';
|
||||
configfile.close();
|
||||
}
|
||||
|
||||
size_t countUniqueSource(const std::string &path)
|
||||
{
|
||||
size_t ret = 0;
|
||||
InputFileStream strme(path);
|
||||
|
||||
std::string line, prevSource;
|
||||
while (std::getline(strme, line)) {
|
||||
std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
|
||||
assert(toks.size() != 0);
|
||||
|
||||
if (prevSource != toks[0]) {
|
||||
prevSource = toks[0];
|
||||
++ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void serialize_cache(
|
||||
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
|
||||
const std::string &path, float totalSourceCount)
|
||||
{
|
||||
std::vector<const CacheItem*> vec(cache.size());
|
||||
|
||||
size_t ind = cache.size() - 1;
|
||||
while (!cache.empty()) {
|
||||
const CacheItem *item = cache.top();
|
||||
vec[ind] = item;
|
||||
cache.pop();
|
||||
--ind;
|
||||
}
|
||||
|
||||
std::ofstream os(path.c_str());
|
||||
|
||||
os << totalSourceCount << std::endl;
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
const CacheItem *item = vec[i];
|
||||
os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl;
|
||||
delete item;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
|
||||
{
|
||||
return Moses2::getKey(vocabid_source.data(), vocabid_source.size());
|
||||
}
|
||||
|
||||
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
|
||||
{
|
||||
assert(endPos < vocabid_source.size());
|
||||
|
||||
std::vector<uint64_t> ret(endPos + 1);
|
||||
for (size_t i = 0; i <= endPos; ++i) {
|
||||
ret[i] = vocabid_source[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,95 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <queue>
|
||||
#include <sys/stat.h> //mkdir
|
||||
|
||||
#include "hash.hh" //Includes line_splitter
|
||||
#include "probing_hash_utils.hh"
|
||||
|
||||
#include "util/file_piece.hh"
|
||||
#include "util/file.hh"
|
||||
#include "vocabid.hh"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
typedef std::vector<uint64_t> SourcePhrase;
|
||||
|
||||
|
||||
class Node
|
||||
{
|
||||
typedef boost::unordered_map<uint64_t, Node> Children;
|
||||
Children m_children;
|
||||
|
||||
public:
|
||||
uint64_t key;
|
||||
bool done;
|
||||
|
||||
Node()
|
||||
:done(false)
|
||||
{}
|
||||
|
||||
void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
|
||||
void Write(Table &table);
|
||||
};
|
||||
|
||||
|
||||
void createProbingPT(const std::string &phrasetable_path,
|
||||
const std::string &basepath, int num_scores, int num_lex_scores,
|
||||
bool log_prob, int max_cache_size, bool scfg);
|
||||
uint64_t getKey(const std::vector<uint64_t> &source_phrase);
|
||||
|
||||
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
|
||||
|
||||
template<typename T>
|
||||
std::string Debug(const std::vector<T> &vec)
|
||||
{
|
||||
std::stringstream strm;
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
strm << vec[i] << " ";
|
||||
}
|
||||
return strm.str();
|
||||
}
|
||||
|
||||
size_t countUniqueSource(const std::string &path);
|
||||
|
||||
class CacheItem
|
||||
{
|
||||
public:
|
||||
std::string source;
|
||||
uint64_t sourceKey;
|
||||
float count;
|
||||
CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
|
||||
:source(vSource)
|
||||
,sourceKey(vSourceKey)
|
||||
,count(vCount)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator<(const CacheItem &other) const
|
||||
{
|
||||
return count > other.count;
|
||||
}
|
||||
};
|
||||
|
||||
class CacheItemOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(const CacheItem* a, const CacheItem* b) const
|
||||
{
|
||||
return (*a) < (*b);
|
||||
}
|
||||
};
|
||||
|
||||
void serialize_cache(
|
||||
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
|
||||
const std::string &path, float totalSourceCount);
|
||||
|
||||
}
|
||||
|
@ -1,59 +0,0 @@
|
||||
#include <boost/foreach.hpp>
|
||||
#include "vocabid.hh"
|
||||
#include "StoreVocab.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
|
||||
const StringPiece &textin)
|
||||
{
|
||||
//Tokenize
|
||||
util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
|
||||
|
||||
while (itWord) {
|
||||
StringPiece word = *itWord;
|
||||
|
||||
util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
|
||||
while (itFactor) {
|
||||
StringPiece factor = *itFactor;
|
||||
|
||||
sourceVocab.Insert(getHash(factor), factor.as_string());
|
||||
itFactor++;
|
||||
}
|
||||
itWord++;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize_map(const std::map<uint64_t, std::string> &karta,
|
||||
const std::string &filename)
|
||||
{
|
||||
std::ofstream os(filename.c_str());
|
||||
|
||||
std::map<uint64_t, std::string>::const_iterator iter;
|
||||
for (iter = karta.begin(); iter != karta.end(); ++iter) {
|
||||
os << iter->first << '\t' << iter->second << std::endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
|
||||
{
|
||||
std::ifstream is(filename);
|
||||
|
||||
std::string line;
|
||||
while (getline(is, line)) {
|
||||
std::vector<std::string> toks = Tokenize(line, "\t");
|
||||
assert(toks.size() == 2);
|
||||
uint64_t ind = Scan<uint64_t>(toks[1]);
|
||||
karta[ind] = toks[0];
|
||||
}
|
||||
|
||||
//Close the stream after we are done.
|
||||
is.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
//Serialization
|
||||
#include <boost/serialization/serialization.hpp>
|
||||
#include <boost/serialization/map.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <map> //Container
|
||||
#include "hash.hh" //Hash of elements
|
||||
|
||||
#include "util/string_piece.hh" //Tokenization and work with StringPiece
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
template<typename VOCABID>
|
||||
class StoreVocab;
|
||||
|
||||
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
|
||||
const StringPiece &textin);
|
||||
|
||||
void serialize_map(const std::map<uint64_t, std::string> &karta,
|
||||
const std::string &filename);
|
||||
|
||||
void read_map(std::map<uint64_t, std::string> &karta, const char* filename);
|
||||
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Transliteration.h
|
||||
*
|
||||
* Created on: 28 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PhraseTable.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Sentence;
|
||||
class InputPaths;
|
||||
class Range;
|
||||
|
||||
class Transliteration: public PhraseTable
|
||||
{
|
||||
public:
|
||||
Transliteration(size_t startInd, const std::string &line);
|
||||
virtual ~Transliteration();
|
||||
|
||||
void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
|
||||
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
|
||||
InputPath &inputPath) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
|
||||
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void InitActiveChart(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
void Lookup(MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
size_t maxChartSpan,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
void LookupUnary(MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
protected:
|
||||
virtual void LookupNT(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
const SCFG::InputPath &prevPath,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
virtual void LookupGivenWord(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::InputPath &prevPath,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
virtual void LookupGivenNode(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::ActiveChartEntry &prevEntry,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
std::string m_filePath;
|
||||
std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
|
||||
|
||||
std::vector<TargetPhraseImpl*> CreateTargetPhrases(
|
||||
const Manager &mgr,
|
||||
MemPool &pool,
|
||||
const SubPhrase<Moses2::Word> &sourcePhrase,
|
||||
const std::string &outDir) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,89 +0,0 @@
|
||||
/*
|
||||
* UnknownWordPenalty.h
|
||||
*
|
||||
* Created on: 28 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PhraseTable.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Sentence;
|
||||
class InputPaths;
|
||||
class Range;
|
||||
|
||||
class UnknownWordPenalty: public PhraseTable
|
||||
{
|
||||
public:
|
||||
UnknownWordPenalty(size_t startInd, const std::string &line);
|
||||
virtual ~UnknownWordPenalty();
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
|
||||
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
|
||||
InputPath &inputPath) const;
|
||||
|
||||
void ProcessXML(
|
||||
const Manager &mgr,
|
||||
MemPool &pool,
|
||||
const Sentence &sentence,
|
||||
InputPaths &inputPaths) const;
|
||||
|
||||
virtual void
|
||||
EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
|
||||
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
||||
SCORE &estimatedScore) const;
|
||||
|
||||
virtual void InitActiveChart(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
void Lookup(MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
size_t maxChartSpan,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
void LookupUnary(MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &path) const;
|
||||
|
||||
protected:
|
||||
virtual void LookupNT(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
const SCFG::InputPath &prevPath,
|
||||
const SCFG::Stacks &stacks,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
virtual void LookupGivenWord(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::InputPath &prevPath,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const;
|
||||
|
||||
virtual void LookupGivenNode(
|
||||
MemPool &pool,
|
||||
const SCFG::Manager &mgr,
|
||||
const SCFG::ActiveChartEntry &prevEntry,
|
||||
const SCFG::Word &wordSought,
|
||||
const Moses2::Hypotheses *hypos,
|
||||
const Moses2::Range &subPhraseRange,
|
||||
SCFG::InputPath &outPath) const;
|
||||
protected:
|
||||
bool m_drop;
|
||||
std::string m_prefix, m_suffix;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* CubePruning.cpp
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Misc.h"
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
QueueItem *QueueItem::Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
QueueItem *ret;
|
||||
if (currItem) {
|
||||
// reuse incoming queue item to create new item
|
||||
ret = currItem;
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
else if (!queueItemRecycler.empty()) {
|
||||
// use item from recycle bin
|
||||
ret = queueItemRecycler.back();
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
queueItemRecycler.pop_back();
|
||||
}
|
||||
else {
|
||||
// create new item
|
||||
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
:edge(&edge)
|
||||
,hypoIndex(hypoIndex)
|
||||
,tpIndex(tpIndex)
|
||||
{
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
{
|
||||
this->edge = &edge;
|
||||
this->hypoIndex = hypoIndex;
|
||||
this->tpIndex = tpIndex;
|
||||
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::CreateHypothesis(Manager &mgr)
|
||||
{
|
||||
const Hypothesis *prevHypo = edge->hypos[hypoIndex];
|
||||
const TargetPhrase &tp = edge->tps[tpIndex];
|
||||
|
||||
//cerr << "hypoIndex=" << hypoIndex << endl;
|
||||
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
|
||||
//cerr << prevHypo << endl;
|
||||
//cerr << *prevHypo << endl;
|
||||
|
||||
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
|
||||
hypo->EvaluateWhenApplied();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
CubeEdge::CubeEdge(
|
||||
Manager &mgr,
|
||||
const Hypotheses &hypos,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap)
|
||||
:hypos(hypos)
|
||||
,path(path)
|
||||
,tps(tps)
|
||||
,newBitmap(newBitmap)
|
||||
{
|
||||
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
|
||||
{
|
||||
out << obj.newBitmap;
|
||||
return out;
|
||||
}
|
||||
|
||||
bool
|
||||
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
|
||||
{
|
||||
//UTIL_THROW_IF2(x >= (1<<17), "Error");
|
||||
//UTIL_THROW_IF2(y >= (1<<17), "Error");
|
||||
|
||||
SeenPositionItem val(this, (x<<16) + y);
|
||||
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
|
||||
return pairRet.second;
|
||||
}
|
||||
|
||||
void CubeEdge::CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
assert(hypos.size());
|
||||
assert(tps.GetSize());
|
||||
|
||||
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
|
||||
queue.push(item);
|
||||
bool setSeen = SetSeenPosition(0, 0, seenPositions);
|
||||
assert(setSeen);
|
||||
}
|
||||
|
||||
void CubeEdge::CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
size_t hypoIndex = item->hypoIndex;
|
||||
size_t tpIndex = item->tpIndex;
|
||||
|
||||
if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
|
||||
// reuse incoming queue item to create new item
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
|
||||
assert(newItem == item);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (item) {
|
||||
// recycle unused queue item
|
||||
queueItemRecycler.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,111 +0,0 @@
|
||||
/*
|
||||
* CubePruning.h
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "../../legacy/Range.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "Stack.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class Bitmap;
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
class CubeEdge;
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItem
|
||||
{
|
||||
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
|
||||
public:
|
||||
static QueueItem *Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
CubeEdge *edge;
|
||||
size_t hypoIndex, tpIndex;
|
||||
Hypothesis *hypo;
|
||||
|
||||
protected:
|
||||
void CreateHypothesis(Manager &mgr);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItemOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
|
||||
HypothesisFutureScoreOrderer orderer;
|
||||
return !orderer(itemA->hypo, itemB->hypo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class CubeEdge
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
|
||||
|
||||
public:
|
||||
typedef std::priority_queue<QueueItem*,
|
||||
std::vector<QueueItem*>,
|
||||
QueueItemOrderer> Queue;
|
||||
|
||||
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
|
||||
typedef boost::unordered_set<SeenPositionItem,
|
||||
boost::hash<SeenPositionItem>,
|
||||
std::equal_to<SeenPositionItem> > SeenPositions;
|
||||
|
||||
const Hypotheses &hypos;
|
||||
const InputPath &path;
|
||||
const TargetPhrases &tps;
|
||||
const Bitmap &newBitmap;
|
||||
SCORE estimatedScore;
|
||||
|
||||
CubeEdge(Manager &mgr,
|
||||
const Hypotheses &hypos,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap);
|
||||
|
||||
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
|
||||
|
||||
void CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
void CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,206 +0,0 @@
|
||||
/*
|
||||
* Search.cpp
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Search.h"
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../InputPaths.h"
|
||||
#include "../../InputPath.h"
|
||||
#include "../../System.h"
|
||||
#include "../../Sentence.h"
|
||||
#include "../../TranslationTask.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
,m_stack(mgr)
|
||||
|
||||
,m_queue(QueueItemOrderer(), std::vector<QueueItem*>() )
|
||||
|
||||
,m_seenPositions()
|
||||
{
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init cue edges
|
||||
m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
|
||||
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
|
||||
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
|
||||
}
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stack.Add(initHypo, mgr.GetHypoRecycle());
|
||||
PostDecode(0);
|
||||
|
||||
for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
m_stack.Clear();
|
||||
Decode(stackInd);
|
||||
PostDecode(stackInd);
|
||||
|
||||
//m_stack.DebugCounts();
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
|
||||
|
||||
// reuse queue from previous stack. Clear it first
|
||||
std::vector<QueueItem*> &container = Container(m_queue);
|
||||
//cerr << "container=" << container.size() << endl;
|
||||
BOOST_FOREACH(QueueItem *item, container) {
|
||||
// recycle unused hypos from queue
|
||||
Hypothesis *hypo = item->hypo;
|
||||
hypoRecycler.Recycle(hypo);
|
||||
|
||||
// recycle queue item
|
||||
m_queueItemRecycler.push_back(item);
|
||||
}
|
||||
container.clear();
|
||||
|
||||
m_seenPositions.clear();
|
||||
|
||||
// add top hypo from every edge into queue
|
||||
CubeEdges &edges = *m_cubeEdges[stackInd];
|
||||
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
//cerr << *edge << " ";
|
||||
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "edges: ";
|
||||
boost::unordered_set<const Bitmap*> uniqueBM;
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
uniqueBM.insert(&edge->newBitmap);
|
||||
//cerr << *edge << " ";
|
||||
}
|
||||
cerr << edges.size() << " " << uniqueBM.size();
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
size_t pops = 0;
|
||||
while (!m_queue.empty() && pops < mgr.system.popLimit) {
|
||||
// get best hypo from queue, add to stack
|
||||
//cerr << "queue=" << queue.size() << endl;
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
CubeEdge *edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stack.Add(hypo, hypoRecycler);
|
||||
|
||||
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
|
||||
++pops;
|
||||
}
|
||||
|
||||
/*
|
||||
// create hypo from every edge. Increase diversity
|
||||
while (!m_queue.empty()) {
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
if (item->hypoIndex == 0 && item->tpIndex == 0) {
|
||||
CubeEdge &edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, mgr.GetHypoRecycle());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void Search::PostDecode(size_t stackInd)
|
||||
{
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
|
||||
|
||||
BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
|
||||
const Bitmap &hypoBitmap = *val.first.first;
|
||||
size_t hypoEndPos = val.first.second;
|
||||
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
|
||||
|
||||
// create edges to next hypos from existing hypos
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPath *path, paths) {
|
||||
const Range &pathRange = path->range;
|
||||
//cerr << "pathRange=" << pathRange << endl;
|
||||
|
||||
if (!path->IsUsed()) {
|
||||
continue;
|
||||
}
|
||||
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
size_t numWords = newBitmap.GetNumWordsCovered();
|
||||
|
||||
CubeEdges &edges = *m_cubeEdges[numWords];
|
||||
|
||||
// sort hypo for a particular bitmap and hypoEndPos
|
||||
Hypotheses &sortedHypos = *val.second;
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = path->targetPhrases[i];
|
||||
if (tps && tps->GetSize()) {
|
||||
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
|
||||
edges.push_back(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = sortedHypos[0];
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Search.h
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include "../Search.h"
|
||||
#include "Misc.h"
|
||||
#include "Stack.h"
|
||||
#include "../../legacy/Range.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Bitmap;
|
||||
class Hypothesis;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
|
||||
class Search : public Moses2::Search
|
||||
{
|
||||
public:
|
||||
Search(Manager &mgr);
|
||||
virtual ~Search();
|
||||
|
||||
virtual void Decode();
|
||||
const Hypothesis *GetBestHypo() const;
|
||||
|
||||
protected:
|
||||
Stack m_stack;
|
||||
|
||||
CubeEdge::Queue m_queue;
|
||||
CubeEdge::SeenPositions m_seenPositions;
|
||||
|
||||
// CUBE PRUNING VARIABLES
|
||||
// setup
|
||||
typedef std::vector<CubeEdge*> CubeEdges;
|
||||
std::vector<CubeEdges*> m_cubeEdges;
|
||||
|
||||
std::deque<QueueItem*> m_queueItemRecycler;
|
||||
|
||||
// CUBE PRUNING
|
||||
// decoding
|
||||
void Decode(size_t stackInd);
|
||||
void PostDecode(size_t stackInd);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,303 +0,0 @@
|
||||
/*
|
||||
* Stack.cpp
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Stack.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../Scores.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
MiniStack::MiniStack(const Manager &mgr)
|
||||
:m_coll()
|
||||
,m_sortedHypos(NULL)
|
||||
{}
|
||||
|
||||
StackAdd MiniStack::Add(const Hypothesis *hypo)
|
||||
{
|
||||
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
|
||||
|
||||
// CHECK RECOMBINATION
|
||||
if (addRet.second) {
|
||||
// equiv hypo doesn't exists
|
||||
return StackAdd(true, NULL);
|
||||
}
|
||||
else {
|
||||
const Hypothesis *hypoExisting = *addRet.first;
|
||||
if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) {
|
||||
// incoming hypo is better than the one we have
|
||||
const Hypothesis *const &hypoExisting1 = *addRet.first;
|
||||
const Hypothesis *&hypoExisting2 = const_cast<const Hypothesis *&>(hypoExisting1);
|
||||
hypoExisting2 = hypo;
|
||||
|
||||
return StackAdd(true, const_cast<Hypothesis*>(hypoExisting));
|
||||
}
|
||||
else {
|
||||
// already storing the best hypo. discard incoming hypo
|
||||
return StackAdd(false, const_cast<Hypothesis*>(hypo));
|
||||
}
|
||||
}
|
||||
|
||||
assert(false);
|
||||
}
|
||||
|
||||
Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const
|
||||
{
|
||||
if (m_sortedHypos == NULL) {
|
||||
// create sortedHypos first
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_sortedHypos = new (pool.Allocate< Vector<const Hypothesis*> >()) Vector<const Hypothesis*>(pool, m_coll.size());
|
||||
|
||||
size_t ind = 0;
|
||||
BOOST_FOREACH(const Hypothesis *hypo, m_coll) {
|
||||
(*m_sortedHypos)[ind] = hypo;
|
||||
++ind;
|
||||
}
|
||||
|
||||
SortAndPruneHypos(mgr);
|
||||
}
|
||||
|
||||
return *m_sortedHypos;
|
||||
}
|
||||
|
||||
void MiniStack::SortAndPruneHypos(const Manager &mgr) const
|
||||
{
|
||||
size_t stackSize = mgr.system.stackSize;
|
||||
Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
|
||||
|
||||
/*
|
||||
cerr << "UNSORTED hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << *hypo << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
Hypotheses::iterator iterMiddle;
|
||||
iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize)
|
||||
? m_sortedHypos->end()
|
||||
: m_sortedHypos->begin() + stackSize;
|
||||
|
||||
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
|
||||
// prune
|
||||
if (stackSize && m_sortedHypos->size() > stackSize) {
|
||||
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
|
||||
Hypothesis *hypo = const_cast<Hypothesis*>((*m_sortedHypos)[i]);
|
||||
recycler.Recycle(hypo);
|
||||
}
|
||||
m_sortedHypos->resize(stackSize);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "sorted hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << hypo << " " << *hypo << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
void MiniStack::Clear()
|
||||
{
|
||||
m_sortedHypos = NULL;
|
||||
m_coll.clear();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
Stack::Stack(const Manager &mgr)
|
||||
:m_mgr(mgr)
|
||||
,m_coll()
|
||||
,m_miniStackRecycler()
|
||||
{
|
||||
}
|
||||
|
||||
Stack::~Stack() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Stack::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
|
||||
{
|
||||
HypoCoverageInternal key = &hypo->GetBitmap();
|
||||
StackAdd added = GetMiniStack(key).Add(hypo);
|
||||
|
||||
if (added.toBeDeleted) {
|
||||
hypoRecycle.Recycle(added.toBeDeleted);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const Hypothesis*> Stack::GetBestHypos(size_t num) const
|
||||
{
|
||||
std::vector<const Hypothesis*> ret;
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
|
||||
const MiniStack::_HCType &hypos = val.second->GetColl();
|
||||
ret.insert(ret.end(), hypos.begin(), hypos.end());
|
||||
}
|
||||
|
||||
std::vector<const Hypothesis*>::iterator iterMiddle;
|
||||
iterMiddle = (num == 0 || ret.size() < num)
|
||||
? ret.end()
|
||||
: ret.begin()+num;
|
||||
|
||||
std::partial_sort(ret.begin(), iterMiddle, ret.end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t Stack::GetHypoSize() const
|
||||
{
|
||||
size_t ret = 0;
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
|
||||
const MiniStack::_HCType &hypos = val.second->GetColl();
|
||||
ret += hypos.size();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key)
|
||||
{
|
||||
MiniStack *ret;
|
||||
Coll::iterator iter = m_coll.find(key);
|
||||
if (iter == m_coll.end()) {
|
||||
if (m_miniStackRecycler.empty()) {
|
||||
ret = new (m_mgr.GetPool().Allocate<MiniStack>()) MiniStack(m_mgr);
|
||||
}
|
||||
else {
|
||||
ret = m_miniStackRecycler.back();
|
||||
ret->Clear();
|
||||
m_miniStackRecycler.pop_back();
|
||||
}
|
||||
|
||||
m_coll[key] = ret;
|
||||
}
|
||||
else {
|
||||
ret = iter->second;
|
||||
}
|
||||
return *ret;
|
||||
}
|
||||
|
||||
void Stack::Clear()
|
||||
{
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
|
||||
MiniStack *miniStack = val.second;
|
||||
m_miniStackRecycler.push_back(miniStack);
|
||||
}
|
||||
|
||||
m_coll.clear();
|
||||
}
|
||||
|
||||
Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const
|
||||
{
|
||||
SortedHypos ret;
|
||||
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
// prune and sort
|
||||
Hypotheses *allHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, GetHypoSize());
|
||||
size_t i = 0;
|
||||
|
||||
BOOST_FOREACH(const Coll::value_type &val, m_coll) {
|
||||
const MiniStack *miniStack = val.second;
|
||||
const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl();
|
||||
|
||||
BOOST_FOREACH(const Hypothesis *hypo, hypos) {
|
||||
(*allHypos)[i++] = hypo;
|
||||
}
|
||||
}
|
||||
|
||||
SortAndPruneHypos(mgr, *allHypos);
|
||||
|
||||
// divide hypos by [bitmap, last end pos]
|
||||
BOOST_FOREACH(const Hypothesis *hypo, *allHypos) {
|
||||
HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos());
|
||||
|
||||
Hypotheses *hypos;
|
||||
SortedHypos::iterator iter;
|
||||
iter = ret.find(key);
|
||||
if (iter == ret.end()) {
|
||||
hypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool);
|
||||
ret[key] = hypos;
|
||||
}
|
||||
else {
|
||||
hypos = iter->second;
|
||||
}
|
||||
hypos->push_back(hypo);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const
|
||||
{
|
||||
size_t stackSize = mgr.system.stackSize;
|
||||
Recycler<Hypothesis*> &recycler = mgr.GetHypoRecycle();
|
||||
|
||||
/*
|
||||
cerr << "UNSORTED hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << *hypo << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
Hypotheses::iterator iterMiddle;
|
||||
iterMiddle = (stackSize == 0 || hypos.size() < stackSize)
|
||||
? hypos.end()
|
||||
: hypos.begin() + stackSize;
|
||||
|
||||
std::partial_sort(hypos.begin(), iterMiddle, hypos.end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
|
||||
// prune
|
||||
if (stackSize && hypos.size() > stackSize) {
|
||||
for (size_t i = stackSize; i < hypos.size(); ++i) {
|
||||
Hypothesis *hypo = const_cast<Hypothesis*>(hypos[i]);
|
||||
recycler.Recycle(hypo);
|
||||
}
|
||||
hypos.resize(stackSize);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "sorted hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << hypo << " " << *hypo << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Stack::DebugCounts()
|
||||
{
|
||||
/*
|
||||
cerr << "counts=";
|
||||
BOOST_FOREACH(const Coll::value_type &val, GetColl()) {
|
||||
const NSCubePruning::MiniStack &miniStack = *val.second;
|
||||
size_t count = miniStack.GetColl().size();
|
||||
cerr << count << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,109 +0,0 @@
|
||||
/*
|
||||
* Stack.h
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <deque>
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../Recycler.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
|
||||
namespace NSCubePruningBitmapStack
|
||||
{
|
||||
typedef Vector<const Hypothesis*> Hypotheses;
|
||||
|
||||
class MiniStack
|
||||
{
|
||||
public:
|
||||
typedef boost::unordered_set<const Hypothesis*,
|
||||
UnorderedComparer<Hypothesis>,
|
||||
UnorderedComparer<Hypothesis>
|
||||
> _HCType;
|
||||
|
||||
MiniStack(const Manager &mgr);
|
||||
|
||||
StackAdd Add(const Hypothesis *hypo);
|
||||
|
||||
_HCType &GetColl()
|
||||
{ return m_coll; }
|
||||
|
||||
const _HCType &GetColl() const
|
||||
{ return m_coll; }
|
||||
|
||||
void Clear();
|
||||
|
||||
Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const;
|
||||
|
||||
protected:
|
||||
_HCType m_coll;
|
||||
mutable Hypotheses *m_sortedHypos;
|
||||
|
||||
void SortAndPruneHypos(const Manager &mgr) const;
|
||||
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////
|
||||
class Stack {
|
||||
protected:
|
||||
|
||||
|
||||
public:
|
||||
typedef std::pair<const Bitmap*, size_t> HypoCoverage;
|
||||
// bitmap and current endPos of hypos
|
||||
typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
|
||||
|
||||
typedef const Bitmap* HypoCoverageInternal;
|
||||
typedef boost::unordered_map<HypoCoverageInternal, MiniStack*
|
||||
,boost::hash<HypoCoverageInternal>
|
||||
,std::equal_to<HypoCoverageInternal>
|
||||
> Coll;
|
||||
|
||||
|
||||
Stack(const Manager &mgr);
|
||||
virtual ~Stack();
|
||||
|
||||
size_t GetHypoSize() const;
|
||||
|
||||
Coll &GetColl()
|
||||
{ return m_coll; }
|
||||
const Coll &GetColl() const
|
||||
{ return m_coll; }
|
||||
|
||||
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
|
||||
|
||||
MiniStack &GetMiniStack(const HypoCoverageInternal &key);
|
||||
|
||||
std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
|
||||
void Clear();
|
||||
|
||||
SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
|
||||
void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
|
||||
|
||||
void DebugCounts();
|
||||
|
||||
protected:
|
||||
const Manager &m_mgr;
|
||||
Coll m_coll;
|
||||
|
||||
std::deque<MiniStack*> m_miniStackRecycler;
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* CubePruning.cpp
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Misc.h"
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningCardinalStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
QueueItem *QueueItem::Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
QueueItem *ret;
|
||||
if (currItem) {
|
||||
// reuse incoming queue item to create new item
|
||||
ret = currItem;
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
else if (!queueItemRecycler.empty()) {
|
||||
// use item from recycle bin
|
||||
ret = queueItemRecycler.back();
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
queueItemRecycler.pop_back();
|
||||
}
|
||||
else {
|
||||
// create new item
|
||||
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
:edge(&edge)
|
||||
,hypoIndex(hypoIndex)
|
||||
,tpIndex(tpIndex)
|
||||
{
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
{
|
||||
this->edge = &edge;
|
||||
this->hypoIndex = hypoIndex;
|
||||
this->tpIndex = tpIndex;
|
||||
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::CreateHypothesis(Manager &mgr)
|
||||
{
|
||||
const Hypothesis *prevHypo = edge->hypos[hypoIndex];
|
||||
const TargetPhrase &tp = edge->tps[tpIndex];
|
||||
|
||||
//cerr << "hypoIndex=" << hypoIndex << endl;
|
||||
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
|
||||
//cerr << prevHypo << endl;
|
||||
//cerr << *prevHypo << endl;
|
||||
|
||||
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
|
||||
hypo->EvaluateWhenApplied();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
CubeEdge::CubeEdge(
|
||||
Manager &mgr,
|
||||
const Hypotheses &hypos,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap)
|
||||
:hypos(hypos)
|
||||
,path(path)
|
||||
,tps(tps)
|
||||
,newBitmap(newBitmap)
|
||||
{
|
||||
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
|
||||
{
|
||||
out << obj.newBitmap;
|
||||
return out;
|
||||
}
|
||||
|
||||
bool
|
||||
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
|
||||
{
|
||||
//UTIL_THROW_IF2(x >= (1<<17), "Error");
|
||||
//UTIL_THROW_IF2(y >= (1<<17), "Error");
|
||||
|
||||
SeenPositionItem val(this, (x<<16) + y);
|
||||
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
|
||||
return pairRet.second;
|
||||
}
|
||||
|
||||
void CubeEdge::CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
assert(hypos.size());
|
||||
assert(tps.GetSize());
|
||||
|
||||
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
|
||||
queue.push(item);
|
||||
bool setSeen = SetSeenPosition(0, 0, seenPositions);
|
||||
assert(setSeen);
|
||||
}
|
||||
|
||||
void CubeEdge::CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
size_t hypoIndex = item->hypoIndex;
|
||||
size_t tpIndex = item->tpIndex;
|
||||
|
||||
if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
|
||||
// reuse incoming queue item to create new item
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
|
||||
assert(newItem == item);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (item) {
|
||||
// recycle unused queue item
|
||||
queueItemRecycler.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,112 +0,0 @@
|
||||
/*
|
||||
* CubePruning.h
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "../../legacy/Range.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "Stack.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class Bitmap;
|
||||
|
||||
namespace NSCubePruningCardinalStack
|
||||
{
|
||||
class CubeEdge;
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItem
|
||||
{
|
||||
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
|
||||
public:
|
||||
static QueueItem *Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
CubeEdge *edge;
|
||||
size_t hypoIndex, tpIndex;
|
||||
Hypothesis *hypo;
|
||||
|
||||
protected:
|
||||
void CreateHypothesis(Manager &mgr);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItemOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
|
||||
HypothesisFutureScoreOrderer orderer;
|
||||
return !orderer(itemA->hypo, itemB->hypo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class CubeEdge
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
|
||||
|
||||
public:
|
||||
typedef std::priority_queue<QueueItem*,
|
||||
std::vector<QueueItem*>,
|
||||
QueueItemOrderer> Queue;
|
||||
|
||||
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
|
||||
typedef boost::unordered_set<SeenPositionItem,
|
||||
boost::hash<SeenPositionItem>,
|
||||
std::equal_to<SeenPositionItem>
|
||||
> SeenPositions;
|
||||
|
||||
const Hypotheses &hypos;
|
||||
const InputPath &path;
|
||||
const TargetPhrases &tps;
|
||||
const Bitmap &newBitmap;
|
||||
SCORE estimatedScore;
|
||||
|
||||
CubeEdge(Manager &mgr,
|
||||
const Hypotheses &hypos,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap);
|
||||
|
||||
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
|
||||
|
||||
void CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
void CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,206 +0,0 @@
|
||||
/*
|
||||
* Search.cpp
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Search.h"
|
||||
#include "Stack.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../InputPaths.h"
|
||||
#include "../../InputPath.h"
|
||||
#include "../../System.h"
|
||||
#include "../../Sentence.h"
|
||||
#include "../../TranslationTask.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningCardinalStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
,m_stack(mgr)
|
||||
|
||||
,m_queue(QueueItemOrderer(), std::vector<QueueItem* >() )
|
||||
|
||||
,m_seenPositions()
|
||||
{
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init cue edges
|
||||
m_cubeEdges.resize(mgr.GetInput().GetSize() + 1);
|
||||
for (size_t i = 0; i < m_cubeEdges.size(); ++i) {
|
||||
m_cubeEdges[i] = new (mgr.GetPool().Allocate<CubeEdges>()) CubeEdges();
|
||||
}
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stack.Add(initHypo, mgr.GetHypoRecycle());
|
||||
PostDecode(0);
|
||||
|
||||
for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
m_stack.Clear();
|
||||
Decode(stackInd);
|
||||
PostDecode(stackInd);
|
||||
|
||||
//m_stack.DebugCounts();
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
|
||||
|
||||
// reuse queue from previous stack. Clear it first
|
||||
std::vector<QueueItem*> &container = Container(m_queue);
|
||||
//cerr << "container=" << container.size() << endl;
|
||||
BOOST_FOREACH(QueueItem *item, container) {
|
||||
// recycle unused hypos from queue
|
||||
Hypothesis *hypo = item->hypo;
|
||||
hypoRecycler.Recycle(hypo);
|
||||
|
||||
// recycle queue item
|
||||
m_queueItemRecycler.push_back(item);
|
||||
}
|
||||
container.clear();
|
||||
|
||||
m_seenPositions.clear();
|
||||
|
||||
// add top hypo from every edge into queue
|
||||
CubeEdges &edges = *m_cubeEdges[stackInd];
|
||||
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
//cerr << *edge << " ";
|
||||
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "edges: ";
|
||||
boost::unordered_set<const Bitmap*> uniqueBM;
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
uniqueBM.insert(&edge->newBitmap);
|
||||
//cerr << *edge << " ";
|
||||
}
|
||||
cerr << edges.size() << " " << uniqueBM.size();
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
size_t pops = 0;
|
||||
while (!m_queue.empty() && pops < mgr.system.popLimit) {
|
||||
// get best hypo from queue, add to stack
|
||||
//cerr << "queue=" << queue.size() << endl;
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
CubeEdge *edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stack.Add(hypo, hypoRecycler);
|
||||
|
||||
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
|
||||
++pops;
|
||||
}
|
||||
|
||||
/*
|
||||
// create hypo from every edge. Increase diversity
|
||||
while (!m_queue.empty()) {
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
if (item->hypoIndex == 0 && item->tpIndex == 0) {
|
||||
CubeEdge &edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, mgr.GetHypoRecycle());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void Search::PostDecode(size_t stackInd)
|
||||
{
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr);
|
||||
|
||||
BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) {
|
||||
const Bitmap &hypoBitmap = *val.first.first;
|
||||
size_t hypoEndPos = val.first.second;
|
||||
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
|
||||
|
||||
// create edges to next hypos from existing hypos
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPath *path, paths) {
|
||||
const Range &pathRange = path->range;
|
||||
//cerr << "pathRange=" << pathRange << endl;
|
||||
|
||||
if (!path->IsUsed()) {
|
||||
continue;
|
||||
}
|
||||
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
size_t numWords = newBitmap.GetNumWordsCovered();
|
||||
|
||||
CubeEdges &edges = *m_cubeEdges[numWords];
|
||||
|
||||
// sort hypo for a particular bitmap and hypoEndPos
|
||||
Hypotheses &sortedHypos = *val.second;
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = path->targetPhrases[i];
|
||||
if (tps && tps->GetSize()) {
|
||||
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap);
|
||||
edges.push_back(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
std::vector<const Hypothesis*> sortedHypos = m_stack.GetBestHypos(1);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = sortedHypos[0];
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Search.h
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include "../Search.h"
|
||||
#include "Misc.h"
|
||||
#include "Stack.h"
|
||||
#include "../../legacy/Range.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Bitmap;
|
||||
class Hypothesis;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
|
||||
namespace NSCubePruningCardinalStack
|
||||
{
|
||||
|
||||
class Search : public Moses2::Search
|
||||
{
|
||||
public:
|
||||
Search(Manager &mgr);
|
||||
virtual ~Search();
|
||||
|
||||
virtual void Decode();
|
||||
const Hypothesis *GetBestHypo() const;
|
||||
|
||||
protected:
|
||||
Stack m_stack;
|
||||
|
||||
CubeEdge::Queue m_queue;
|
||||
CubeEdge::SeenPositions m_seenPositions;
|
||||
|
||||
// CUBE PRUNING VARIABLES
|
||||
// setup
|
||||
typedef std::vector<CubeEdge*> CubeEdges;
|
||||
std::vector<CubeEdges*> m_cubeEdges;
|
||||
|
||||
std::deque<QueueItem*> m_queueItemRecycler;
|
||||
|
||||
// CUBE PRUNING
|
||||
// decoding
|
||||
void Decode(size_t stackInd);
|
||||
void PostDecode(size_t stackInd);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Stack.h
|
||||
*
|
||||
* Created on: 24 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <deque>
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../Recycler.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
|
||||
namespace NSCubePruningCardinalStack
|
||||
{
|
||||
typedef Vector<const Hypothesis*> Hypotheses;
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
class Stack {
|
||||
protected:
|
||||
typedef boost::unordered_set<const Hypothesis*,
|
||||
UnorderedComparer<Hypothesis>,
|
||||
UnorderedComparer<Hypothesis>
|
||||
> _HCType;
|
||||
|
||||
public:
|
||||
typedef std::pair<const Bitmap*, size_t> HypoCoverage;
|
||||
typedef boost::unordered_map<HypoCoverage, Hypotheses*> SortedHypos;
|
||||
|
||||
Stack(const Manager &mgr);
|
||||
virtual ~Stack();
|
||||
|
||||
size_t GetHypoSize() const;
|
||||
|
||||
_HCType &GetColl()
|
||||
{ return m_coll; }
|
||||
const _HCType &GetColl() const
|
||||
{ return m_coll; }
|
||||
|
||||
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
|
||||
|
||||
std::vector<const Hypothesis*> GetBestHypos(size_t num) const;
|
||||
void Clear();
|
||||
|
||||
SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const;
|
||||
void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const;
|
||||
|
||||
protected:
|
||||
const Manager &m_mgr;
|
||||
_HCType m_coll;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* CubePruning.cpp
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Misc.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
QueueItem *QueueItem::Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
QueueItem *ret;
|
||||
if (currItem) {
|
||||
// reuse incoming queue item to create new item
|
||||
ret = currItem;
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
else if (!queueItemRecycler.empty()) {
|
||||
// use item from recycle bin
|
||||
ret = queueItemRecycler.back();
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
queueItemRecycler.pop_back();
|
||||
}
|
||||
else {
|
||||
// create new item
|
||||
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
:edge(&edge)
|
||||
,hypoIndex(hypoIndex)
|
||||
,tpIndex(tpIndex)
|
||||
{
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
{
|
||||
this->edge = &edge;
|
||||
this->hypoIndex = hypoIndex;
|
||||
this->tpIndex = tpIndex;
|
||||
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::CreateHypothesis(Manager &mgr)
|
||||
{
|
||||
const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
|
||||
const TargetPhrase &tp = edge->tps[tpIndex];
|
||||
|
||||
//cerr << "hypoIndex=" << hypoIndex << endl;
|
||||
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
|
||||
//cerr << prevHypo << endl;
|
||||
//cerr << *prevHypo << endl;
|
||||
|
||||
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
|
||||
hypo->EvaluateWhenApplied();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
CubeEdge::CubeEdge(
|
||||
Manager &mgr,
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap)
|
||||
:miniStack(miniStack)
|
||||
,path(path)
|
||||
,tps(tps)
|
||||
,newBitmap(newBitmap)
|
||||
{
|
||||
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
|
||||
{
|
||||
out << obj.newBitmap;
|
||||
return out;
|
||||
}
|
||||
|
||||
bool
|
||||
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
|
||||
{
|
||||
//UTIL_THROW_IF2(x >= (1<<17), "Error");
|
||||
//UTIL_THROW_IF2(y >= (1<<17), "Error");
|
||||
|
||||
SeenPositionItem val(this, (x<<16) + y);
|
||||
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
|
||||
return pairRet.second;
|
||||
}
|
||||
|
||||
void CubeEdge::CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
|
||||
assert(tps.GetSize());
|
||||
|
||||
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
|
||||
queue.push(item);
|
||||
bool setSeen = SetSeenPosition(0, 0, seenPositions);
|
||||
assert(setSeen);
|
||||
}
|
||||
}
|
||||
|
||||
void CubeEdge::CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
size_t hypoIndex = item->hypoIndex;
|
||||
size_t tpIndex = item->tpIndex;
|
||||
|
||||
if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
|
||||
// reuse incoming queue item to create new item
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
|
||||
assert(newItem == item);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (item) {
|
||||
// recycle unused queue item
|
||||
queueItemRecycler.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,113 +0,0 @@
|
||||
/*
|
||||
* CubePruning.h
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "../../legacy/Range.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "../CubePruningMiniStack/Stack.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class Bitmap;
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
class CubeEdge;
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItem
|
||||
{
|
||||
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
|
||||
public:
|
||||
static QueueItem *Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
CubeEdge *edge;
|
||||
size_t hypoIndex, tpIndex;
|
||||
Hypothesis *hypo;
|
||||
|
||||
protected:
|
||||
void CreateHypothesis(Manager &mgr);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItemOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
|
||||
HypothesisFutureScoreOrderer orderer;
|
||||
return !orderer(itemA->hypo, itemB->hypo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class CubeEdge
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
|
||||
|
||||
public:
|
||||
typedef std::priority_queue<QueueItem*,
|
||||
std::vector<QueueItem*>,
|
||||
QueueItemOrderer> Queue;
|
||||
|
||||
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
|
||||
typedef boost::unordered_set<SeenPositionItem,
|
||||
boost::hash<SeenPositionItem>,
|
||||
std::equal_to<SeenPositionItem>
|
||||
> SeenPositions;
|
||||
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack;
|
||||
const InputPath &path;
|
||||
const TargetPhrases &tps;
|
||||
const Bitmap &newBitmap;
|
||||
SCORE estimatedScore;
|
||||
|
||||
CubeEdge(Manager &mgr,
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap);
|
||||
|
||||
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
|
||||
|
||||
void CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
void CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,273 +0,0 @@
|
||||
/*
|
||||
* Search.cpp
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Search.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../InputPaths.h"
|
||||
#include "../../InputPath.h"
|
||||
#include "../../System.h"
|
||||
#include "../../Sentence.h"
|
||||
#include "../../TranslationTask.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
,m_stacks(mgr)
|
||||
|
||||
,m_queue(QueueItemOrderer(),
|
||||
std::vector<QueueItem*>() )
|
||||
|
||||
,m_seenPositions()
|
||||
{
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init stacks
|
||||
m_stacks.Init(mgr.GetInput().GetSize() + 1);
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle());
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
|
||||
CreateSearchGraph(stackInd);
|
||||
}
|
||||
|
||||
for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
Decode(stackInd);
|
||||
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
//DebugCounts();
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
|
||||
// FOR EACH BITMAP IN EACH STACK
|
||||
boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM;
|
||||
|
||||
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
|
||||
const Bitmap *bitmap = val.first.first;
|
||||
uniqueBM[bitmap].push_back(&miniStack);
|
||||
}
|
||||
|
||||
// decode each bitmap
|
||||
boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter;
|
||||
for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) {
|
||||
const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second;
|
||||
Decode(miniStacks);
|
||||
}
|
||||
|
||||
/*
|
||||
// FOR EACH STACK
|
||||
vector<NSCubePruningMiniStack::MiniStack*> miniStacks;
|
||||
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
|
||||
miniStacks.push_back(&miniStack);
|
||||
}
|
||||
Decode(miniStacks);
|
||||
*/
|
||||
}
|
||||
|
||||
void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks)
|
||||
{
|
||||
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
|
||||
|
||||
// reuse queue from previous stack. Clear it first
|
||||
std::vector<QueueItem*> &container = Container(m_queue);
|
||||
//cerr << "container=" << container.size() << endl;
|
||||
BOOST_FOREACH(QueueItem *item, container) {
|
||||
// recycle unused hypos from queue
|
||||
Hypothesis *hypo = item->hypo;
|
||||
hypoRecycler.Recycle(hypo);
|
||||
|
||||
// recycle queue item
|
||||
m_queueItemRecycler.push_back(item);
|
||||
}
|
||||
container.clear();
|
||||
|
||||
m_seenPositions.clear();
|
||||
|
||||
BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) {
|
||||
// add top hypo from every edge into queue
|
||||
CubeEdges &edges = *m_cubeEdges[miniStack];
|
||||
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
//cerr << "edge=" << *edge << endl;
|
||||
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
}
|
||||
}
|
||||
|
||||
size_t pops = 0;
|
||||
while (!m_queue.empty() && pops < mgr.system.popLimit) {
|
||||
// get best hypo from queue, add to stack
|
||||
//cerr << "queue=" << queue.size() << endl;
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
CubeEdge *edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, hypoRecycler);
|
||||
|
||||
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
|
||||
++pops;
|
||||
}
|
||||
|
||||
/*
|
||||
// create hypo from every edge. Increase diversity
|
||||
while (!m_queue.empty()) {
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
if (item->hypoIndex == 0 && item->tpIndex == 0) {
|
||||
CubeEdge &edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, mgr.GetHypoRecycle());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
void Search::CreateSearchGraph(size_t stackInd)
|
||||
{
|
||||
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
const Bitmap &hypoBitmap = *val.first.first;
|
||||
size_t hypoEndPos = val.first.second;
|
||||
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
|
||||
|
||||
// create edges to next hypos from existing hypos
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPath *path, paths) {
|
||||
const Range &pathRange = path->range;
|
||||
//cerr << "pathRange=" << pathRange << endl;
|
||||
|
||||
if (!path->IsUsed()) {
|
||||
continue;
|
||||
}
|
||||
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
|
||||
// sort hypo for a particular bitmap and hypoEndPos
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
|
||||
|
||||
// add cube edge
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = path->targetPhrases[i];
|
||||
if (tps && tps->GetSize()) {
|
||||
// create next mini stack
|
||||
NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
|
||||
|
||||
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
|
||||
|
||||
CubeEdges *edges;
|
||||
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
|
||||
if (iter == m_cubeEdges.end()) {
|
||||
edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
|
||||
m_cubeEdges[&nextMiniStack] = edges;
|
||||
}
|
||||
else {
|
||||
edges = iter->second;
|
||||
}
|
||||
|
||||
edges->push_back(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
|
||||
std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = sortedHypos[0];
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
void Search::DebugCounts()
|
||||
{
|
||||
std::map<size_t, size_t> counts;
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
size_t count = miniStack.GetColl().size();
|
||||
|
||||
if (counts.find(count) == counts.end()) {
|
||||
counts[count] = 0;
|
||||
}
|
||||
else {
|
||||
++counts[count];
|
||||
}
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
std::map<size_t, size_t>::const_iterator iter;
|
||||
for (iter = counts.begin(); iter != counts.end(); ++iter) {
|
||||
cerr << iter->first << "=" << iter->second << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Search.h
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "../Search.h"
|
||||
#include "Misc.h"
|
||||
#include "Stacks.h"
|
||||
#include "../../legacy/Range.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Bitmap;
|
||||
class Hypothesis;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
|
||||
namespace NSCubePruningMiniStack
|
||||
{
|
||||
class MiniStack;
|
||||
}
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
|
||||
class Search : public Moses2::Search
|
||||
{
|
||||
public:
|
||||
Search(Manager &mgr);
|
||||
virtual ~Search();
|
||||
|
||||
virtual void Decode();
|
||||
const Hypothesis *GetBestHypo() const;
|
||||
|
||||
protected:
|
||||
Stacks m_stacks;
|
||||
|
||||
CubeEdge::Queue m_queue;
|
||||
CubeEdge::SeenPositions m_seenPositions;
|
||||
|
||||
// CUBE PRUNING VARIABLES
|
||||
// setup
|
||||
typedef std::vector<CubeEdge*> CubeEdges;
|
||||
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
|
||||
|
||||
std::deque<QueueItem*> m_queueItemRecycler;
|
||||
|
||||
// CUBE PRUNING
|
||||
// decoding
|
||||
void CreateSearchGraph(size_t stackInd);
|
||||
void Decode(size_t stackInd);
|
||||
void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks);
|
||||
|
||||
void DebugCounts();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Stacks.cpp
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Stacks.h"
|
||||
#include "../../System.h"
|
||||
#include "../Manager.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
|
||||
Stacks::Stacks(const Manager &mgr)
|
||||
:m_mgr(mgr)
|
||||
{
|
||||
}
|
||||
|
||||
Stacks::~Stacks()
|
||||
{
|
||||
}
|
||||
|
||||
void Stacks::Init(size_t numStacks)
|
||||
{
|
||||
m_stacks.resize(numStacks);
|
||||
for (size_t i = 0; i < m_stacks.size(); ++i) {
|
||||
m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Stacks &obj)
|
||||
{
|
||||
for (size_t i = 0; i < obj.GetSize(); ++i) {
|
||||
const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
|
||||
out << stack.GetHypoSize() << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
|
||||
{
|
||||
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
|
||||
//cerr << "numWordsCovered=" << numWordsCovered << endl;
|
||||
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
|
||||
stack.Add(hypo, hypoRecycle);
|
||||
|
||||
}
|
||||
|
||||
NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
|
||||
{
|
||||
size_t numWordsCovered = newBitmap.GetNumWordsCovered();
|
||||
//cerr << "numWordsCovered=" << numWordsCovered << endl;
|
||||
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
|
||||
|
||||
NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
|
||||
stack.GetMiniStack(key);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Stacks.h
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "../CubePruningMiniStack/Stack.h"
|
||||
#include "../../Recycler.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Manager;
|
||||
|
||||
namespace NSCubePruningPerBitmap
|
||||
{
|
||||
|
||||
class Stacks {
|
||||
friend std::ostream& operator<<(std::ostream &, const Stacks &);
|
||||
public:
|
||||
Stacks(const Manager &mgr);
|
||||
virtual ~Stacks();
|
||||
|
||||
void Init(size_t numStacks);
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_stacks.size(); }
|
||||
|
||||
const NSCubePruningMiniStack::Stack &Back() const
|
||||
{ return *m_stacks.back(); }
|
||||
|
||||
NSCubePruningMiniStack::Stack &operator[](size_t ind)
|
||||
{ return *m_stacks[ind]; }
|
||||
|
||||
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
|
||||
NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
|
||||
|
||||
protected:
|
||||
const Manager &m_mgr;
|
||||
std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* CubePruning.cpp
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Misc.h"
|
||||
#include "../Manager.h"
|
||||
#include "../../MemPool.h"
|
||||
#include "../../System.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
QueueItem *QueueItem::Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
QueueItem *ret;
|
||||
if (currItem) {
|
||||
// reuse incoming queue item to create new item
|
||||
ret = currItem;
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
else if (!queueItemRecycler.empty()) {
|
||||
// use item from recycle bin
|
||||
ret = queueItemRecycler.back();
|
||||
ret->Init(mgr, edge, hypoIndex, tpIndex);
|
||||
queueItemRecycler.pop_back();
|
||||
}
|
||||
else {
|
||||
// create new item
|
||||
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
:edge(&edge)
|
||||
,hypoIndex(hypoIndex)
|
||||
,tpIndex(tpIndex)
|
||||
{
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
|
||||
{
|
||||
this->edge = &edge;
|
||||
this->hypoIndex = hypoIndex;
|
||||
this->tpIndex = tpIndex;
|
||||
|
||||
CreateHypothesis(mgr);
|
||||
}
|
||||
|
||||
void QueueItem::CreateHypothesis(Manager &mgr)
|
||||
{
|
||||
const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
|
||||
const TargetPhrase &tp = edge->tps[tpIndex];
|
||||
|
||||
//cerr << "hypoIndex=" << hypoIndex << endl;
|
||||
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
|
||||
//cerr << prevHypo << endl;
|
||||
//cerr << *prevHypo << endl;
|
||||
|
||||
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
|
||||
hypo->EvaluateWhenApplied();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
CubeEdge::CubeEdge(
|
||||
Manager &mgr,
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap)
|
||||
:miniStack(miniStack)
|
||||
,path(path)
|
||||
,tps(tps)
|
||||
,newBitmap(newBitmap)
|
||||
{
|
||||
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
|
||||
{
|
||||
out << obj.newBitmap;
|
||||
return out;
|
||||
}
|
||||
|
||||
bool
|
||||
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
|
||||
{
|
||||
//UTIL_THROW_IF2(x >= (1<<17), "Error");
|
||||
//UTIL_THROW_IF2(y >= (1<<17), "Error");
|
||||
|
||||
SeenPositionItem val(this, (x<<16) + y);
|
||||
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
|
||||
return pairRet.second;
|
||||
}
|
||||
|
||||
void CubeEdge::CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
|
||||
assert(tps.GetSize());
|
||||
|
||||
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
|
||||
queue.push(item);
|
||||
bool setSeen = SetSeenPosition(0, 0, seenPositions);
|
||||
assert(setSeen);
|
||||
}
|
||||
}
|
||||
|
||||
void CubeEdge::CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler)
|
||||
{
|
||||
size_t hypoIndex = item->hypoIndex;
|
||||
size_t tpIndex = item->tpIndex;
|
||||
|
||||
if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
|
||||
// reuse incoming queue item to create new item
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
|
||||
assert(newItem == item);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
|
||||
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
|
||||
queue.push(newItem);
|
||||
item = NULL;
|
||||
}
|
||||
|
||||
if (item) {
|
||||
// recycle unused queue item
|
||||
queueItemRecycler.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,113 +0,0 @@
|
||||
/*
|
||||
* CubePruning.h
|
||||
*
|
||||
* Created on: 27 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "../../legacy/Range.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../TypeDef.h"
|
||||
#include "../../Vector.h"
|
||||
#include "../CubePruningMiniStack/Stack.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Manager;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class Bitmap;
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
class CubeEdge;
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItem
|
||||
{
|
||||
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
|
||||
public:
|
||||
static QueueItem *Create(QueueItem *currItem,
|
||||
Manager &mgr,
|
||||
CubeEdge &edge,
|
||||
size_t hypoIndex,
|
||||
size_t tpIndex,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
|
||||
|
||||
CubeEdge *edge;
|
||||
size_t hypoIndex, tpIndex;
|
||||
Hypothesis *hypo;
|
||||
|
||||
protected:
|
||||
void CreateHypothesis(Manager &mgr);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class QueueItemOrderer
|
||||
{
|
||||
public:
|
||||
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
|
||||
HypothesisFutureScoreOrderer orderer;
|
||||
return !orderer(itemA->hypo, itemB->hypo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////
|
||||
class CubeEdge
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
|
||||
|
||||
public:
|
||||
typedef std::priority_queue<QueueItem*,
|
||||
std::vector<QueueItem*>,
|
||||
QueueItemOrderer> Queue;
|
||||
|
||||
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
|
||||
typedef boost::unordered_set<SeenPositionItem,
|
||||
boost::hash<SeenPositionItem>,
|
||||
std::equal_to<SeenPositionItem>
|
||||
> SeenPositions;
|
||||
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack;
|
||||
const InputPath &path;
|
||||
const TargetPhrases &tps;
|
||||
const Bitmap &newBitmap;
|
||||
SCORE estimatedScore;
|
||||
|
||||
CubeEdge(Manager &mgr,
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack,
|
||||
const InputPath &path,
|
||||
const TargetPhrases &tps,
|
||||
const Bitmap &newBitmap);
|
||||
|
||||
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
|
||||
|
||||
void CreateFirst(Manager &mgr,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
void CreateNext(Manager &mgr,
|
||||
QueueItem *item,
|
||||
Queue &queue,
|
||||
SeenPositions &seenPositions,
|
||||
std::deque<QueueItem*> &queueItemRecycler);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,248 +0,0 @@
|
||||
/*
|
||||
* Search.cpp
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <boost/foreach.hpp>
|
||||
#include "Search.h"
|
||||
#include "../Manager.h"
|
||||
#include "../Hypothesis.h"
|
||||
#include "../../InputPaths.h"
|
||||
#include "../../InputPath.h"
|
||||
#include "../../System.h"
|
||||
#include "../../Sentence.h"
|
||||
#include "../../TranslationTask.h"
|
||||
#include "../../legacy/Util2.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
,m_stacks(mgr)
|
||||
|
||||
,m_queue(QueueItemOrderer(),
|
||||
std::vector<QueueItem*>() )
|
||||
|
||||
,m_seenPositions()
|
||||
{
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init stacks
|
||||
m_stacks.Init(mgr.GetInput().GetSize() + 1);
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle());
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
|
||||
CreateSearchGraph(stackInd);
|
||||
}
|
||||
|
||||
for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
Decode(stackInd);
|
||||
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
//DebugCounts();
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
Decode(miniStack);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack)
|
||||
{
|
||||
Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle();
|
||||
|
||||
// reuse queue from previous stack. Clear it first
|
||||
std::vector<QueueItem*> &container = Container(m_queue);
|
||||
//cerr << "container=" << container.size() << endl;
|
||||
BOOST_FOREACH(QueueItem *item, container) {
|
||||
// recycle unused hypos from queue
|
||||
Hypothesis *hypo = item->hypo;
|
||||
hypoRecycler.Recycle(hypo);
|
||||
|
||||
// recycle queue item
|
||||
m_queueItemRecycler.push_back(item);
|
||||
}
|
||||
container.clear();
|
||||
|
||||
m_seenPositions.clear();
|
||||
|
||||
// add top hypo from every edge into queue
|
||||
CubeEdges &edges = *m_cubeEdges[&miniStack];
|
||||
|
||||
BOOST_FOREACH(CubeEdge *edge, edges) {
|
||||
//cerr << "edge=" << *edge << endl;
|
||||
edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
}
|
||||
|
||||
size_t pops = 0;
|
||||
while (!m_queue.empty() && pops < mgr.system.popLimit) {
|
||||
// get best hypo from queue, add to stack
|
||||
//cerr << "queue=" << queue.size() << endl;
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
CubeEdge *edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, hypoRecycler);
|
||||
|
||||
edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
|
||||
|
||||
++pops;
|
||||
}
|
||||
|
||||
/*
|
||||
// create hypo from every edge. Increase diversity
|
||||
while (!m_queue.empty()) {
|
||||
QueueItem *item = m_queue.top();
|
||||
m_queue.pop();
|
||||
|
||||
if (item->hypoIndex == 0 && item->tpIndex == 0) {
|
||||
CubeEdge &edge = item->edge;
|
||||
|
||||
// add hypo to stack
|
||||
Hypothesis *hypo = item->hypo;
|
||||
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
|
||||
m_stacks.Add(hypo, mgr.GetHypoRecycle());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
void Search::CreateSearchGraph(size_t stackInd)
|
||||
{
|
||||
NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
MemPool &pool = mgr.GetPool();
|
||||
|
||||
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
const Bitmap &hypoBitmap = *val.first.first;
|
||||
size_t hypoEndPos = val.first.second;
|
||||
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
|
||||
|
||||
// create edges to next hypos from existing hypos
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPath *path, paths) {
|
||||
const Range &pathRange = path->range;
|
||||
//cerr << "pathRange=" << pathRange << endl;
|
||||
|
||||
if (!path->IsUsed()) {
|
||||
continue;
|
||||
}
|
||||
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
|
||||
// sort hypo for a particular bitmap and hypoEndPos
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
|
||||
|
||||
// add cube edge
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = path->targetPhrases[i];
|
||||
if (tps && tps->GetSize()) {
|
||||
// create next mini stack
|
||||
NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
|
||||
|
||||
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap);
|
||||
|
||||
CubeEdges *edges;
|
||||
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
|
||||
if (iter == m_cubeEdges.end()) {
|
||||
edges = new (pool.Allocate<CubeEdges>()) CubeEdges();
|
||||
m_cubeEdges[&nextMiniStack] = edges;
|
||||
}
|
||||
else {
|
||||
edges = iter->second;
|
||||
}
|
||||
|
||||
edges->push_back(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
const Hypothesis *Search::GetBestHypo() const
|
||||
{
|
||||
const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back();
|
||||
std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = sortedHypos[0];
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
void Search::DebugCounts()
|
||||
{
|
||||
std::map<size_t, size_t> counts;
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd];
|
||||
BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) {
|
||||
const NSCubePruningMiniStack::MiniStack &miniStack = *val.second;
|
||||
size_t count = miniStack.GetColl().size();
|
||||
|
||||
if (counts.find(count) == counts.end()) {
|
||||
counts[count] = 0;
|
||||
}
|
||||
else {
|
||||
++counts[count];
|
||||
}
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
|
||||
std::map<size_t, size_t>::const_iterator iter;
|
||||
for (iter = counts.begin(); iter != counts.end(); ++iter) {
|
||||
cerr << iter->first << "=" << iter->second << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Search.h
|
||||
*
|
||||
* Created on: 16 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "../Search.h"
|
||||
#include "Misc.h"
|
||||
#include "Stacks.h"
|
||||
#include "../../legacy/Range.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
class Bitmap;
|
||||
class Hypothesis;
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
|
||||
namespace NSCubePruningMiniStack
|
||||
{
|
||||
class MiniStack;
|
||||
}
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
|
||||
class Search : public Moses2::Search
|
||||
{
|
||||
public:
|
||||
Search(Manager &mgr);
|
||||
virtual ~Search();
|
||||
|
||||
virtual void Decode();
|
||||
const Hypothesis *GetBestHypo() const;
|
||||
|
||||
protected:
|
||||
Stacks m_stacks;
|
||||
|
||||
CubeEdge::Queue m_queue;
|
||||
CubeEdge::SeenPositions m_seenPositions;
|
||||
|
||||
// CUBE PRUNING VARIABLES
|
||||
// setup
|
||||
typedef std::vector<CubeEdge*> CubeEdges;
|
||||
boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges;
|
||||
|
||||
std::deque<QueueItem*> m_queueItemRecycler;
|
||||
|
||||
// CUBE PRUNING
|
||||
// decoding
|
||||
void CreateSearchGraph(size_t stackInd);
|
||||
void Decode(size_t stackInd);
|
||||
void Decode(NSCubePruningMiniStack::MiniStack &miniStack);
|
||||
|
||||
void DebugCounts();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Stacks.cpp
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Stacks.h"
|
||||
#include "../../System.h"
|
||||
#include "../Manager.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
|
||||
Stacks::Stacks(const Manager &mgr)
|
||||
:m_mgr(mgr)
|
||||
{
|
||||
}
|
||||
|
||||
Stacks::~Stacks()
|
||||
{
|
||||
}
|
||||
|
||||
void Stacks::Init(size_t numStacks)
|
||||
{
|
||||
m_stacks.resize(numStacks);
|
||||
for (size_t i = 0; i < m_stacks.size(); ++i) {
|
||||
m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Stacks &obj)
|
||||
{
|
||||
for (size_t i = 0; i < obj.GetSize(); ++i) {
|
||||
const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i];
|
||||
out << stack.GetHypoSize() << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
|
||||
{
|
||||
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
|
||||
//cerr << "numWordsCovered=" << numWordsCovered << endl;
|
||||
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
|
||||
stack.Add(hypo, hypoRecycle);
|
||||
|
||||
}
|
||||
|
||||
NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
|
||||
{
|
||||
size_t numWordsCovered = newBitmap.GetNumWordsCovered();
|
||||
//cerr << "numWordsCovered=" << numWordsCovered << endl;
|
||||
NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered];
|
||||
|
||||
NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
|
||||
stack.GetMiniStack(key);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Stacks.h
|
||||
*
|
||||
* Created on: 6 Nov 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "../CubePruningMiniStack/Stack.h"
|
||||
#include "../../Recycler.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Manager;
|
||||
|
||||
namespace NSCubePruningPerMiniStack
|
||||
{
|
||||
|
||||
class Stacks {
|
||||
friend std::ostream& operator<<(std::ostream &, const Stacks &);
|
||||
public:
|
||||
Stacks(const Manager &mgr);
|
||||
virtual ~Stacks();
|
||||
|
||||
void Init(size_t numStacks);
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_stacks.size(); }
|
||||
|
||||
const NSCubePruningMiniStack::Stack &Back() const
|
||||
{ return *m_stacks.back(); }
|
||||
|
||||
NSCubePruningMiniStack::Stack &operator[](size_t ind)
|
||||
{ return *m_stacks[ind]; }
|
||||
|
||||
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
|
||||
NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
|
||||
|
||||
protected:
|
||||
const Manager &m_mgr;
|
||||
std::vector<NSCubePruningMiniStack::Stack*> m_stacks;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,244 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include "Range.h"
|
||||
#include "../Array.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class MemPool;
|
||||
|
||||
typedef unsigned long WordsBitmapID;
|
||||
|
||||
/** Vector of boolean to represent whether a word has been translated or not.
|
||||
*
|
||||
* Implemented using a vector of char, which is usually the same representation
|
||||
* for the elements that a C array of bool would use. A vector of bool, or a
|
||||
* Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
|
||||
* algorithms like std::find() are not optimized for vector<bool> on gcc or
|
||||
* clang, and dynamic_bitset lacks all the optimized search operations we want.
|
||||
* Only benchmarking will tell what works best. Perhaps dynamic_bitset could
|
||||
* still be a dramatic improvement, if we flip the meaning of the bits around
|
||||
* so we can use its find_first() and find_next() for the most common searches.
|
||||
*/
|
||||
class Bitmap
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
|
||||
private:
|
||||
Array<char> m_bitmap; //! Ticks of words in sentence that have been done.
|
||||
size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
|
||||
size_t m_numWordsCovered;
|
||||
|
||||
Bitmap(); // not implemented
|
||||
Bitmap& operator=(const Bitmap& other);
|
||||
|
||||
/** Update the first gap, when bits are flipped */
|
||||
void UpdateFirstGap(size_t startPos, size_t endPos, bool value)
|
||||
{
|
||||
if (value) {
|
||||
//may remove gap
|
||||
if (startPos <= m_firstGap && m_firstGap <= endPos) {
|
||||
m_firstGap = NOT_FOUND;
|
||||
for (size_t i = endPos + 1; i < m_bitmap.size(); ++i) {
|
||||
if (!m_bitmap[i]) {
|
||||
m_firstGap = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
//setting positions to false, may add new gap
|
||||
if (startPos < m_firstGap) {
|
||||
m_firstGap = startPos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//! set value between 2 positions, inclusive
|
||||
void
|
||||
SetValueNonOverlap(Range const& range) {
|
||||
size_t startPos = range.GetStartPos();
|
||||
size_t endPos = range.GetEndPos();
|
||||
|
||||
for(size_t pos = startPos; pos <= endPos; pos++) {
|
||||
m_bitmap[pos] = true;
|
||||
}
|
||||
|
||||
m_numWordsCovered += range.GetNumWordsCovered();
|
||||
UpdateFirstGap(startPos, endPos, true);
|
||||
}
|
||||
|
||||
public:
|
||||
//! Create Bitmap of length size, and initialise with vector.
|
||||
explicit Bitmap(MemPool &pool, size_t size);
|
||||
|
||||
void Init(const std::vector<bool>& initializer);
|
||||
void Init(const Bitmap ©, const Range &range);
|
||||
|
||||
//! Count of words translated.
|
||||
size_t GetNumWordsCovered() const {
|
||||
return m_numWordsCovered;
|
||||
}
|
||||
|
||||
//! position of 1st word not yet translated, or NOT_FOUND if everything already translated
|
||||
size_t GetFirstGapPos() const {
|
||||
return m_firstGap;
|
||||
}
|
||||
|
||||
//! position of last word not yet translated, or NOT_FOUND if everything already translated
|
||||
size_t GetLastGapPos() const {
|
||||
for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
|
||||
if (!m_bitmap[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
// no starting pos
|
||||
return NOT_FOUND;
|
||||
}
|
||||
|
||||
//! position of last translated word
|
||||
size_t GetLastPos() const {
|
||||
for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) {
|
||||
if (m_bitmap[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
// no starting pos
|
||||
return NOT_FOUND;
|
||||
}
|
||||
|
||||
//! whether a word has been translated at a particular position
|
||||
bool GetValue(size_t pos) const {
|
||||
return bool(m_bitmap[pos]);
|
||||
}
|
||||
//! set value at a particular position
|
||||
void SetValue( size_t pos, bool value ) {
|
||||
bool origValue = m_bitmap[pos];
|
||||
if (origValue == value) {
|
||||
// do nothing
|
||||
}
|
||||
else {
|
||||
m_bitmap[pos] = value;
|
||||
UpdateFirstGap(pos, pos, value);
|
||||
if (value) {
|
||||
++m_numWordsCovered;
|
||||
}
|
||||
else {
|
||||
--m_numWordsCovered;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//! whether every word has been translated
|
||||
bool IsComplete() const {
|
||||
return GetSize() == GetNumWordsCovered();
|
||||
}
|
||||
//! whether the wordrange overlaps with any translated word in this bitmap
|
||||
bool Overlap(const Range &compare) const {
|
||||
for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) {
|
||||
if (m_bitmap[pos])
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
//! number of elements
|
||||
size_t GetSize() const {
|
||||
return m_bitmap.size();
|
||||
}
|
||||
|
||||
inline size_t GetEdgeToTheLeftOf(size_t l) const {
|
||||
if (l == 0) return l;
|
||||
while (l && !m_bitmap[l-1]) {
|
||||
--l;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
inline size_t GetEdgeToTheRightOf(size_t r) const {
|
||||
if (r+1 == m_bitmap.size()) return r;
|
||||
return (
|
||||
std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
|
||||
m_bitmap.begin()
|
||||
) - 1;
|
||||
}
|
||||
|
||||
//! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
|
||||
WordsBitmapID GetID() const {
|
||||
assert(m_bitmap.size() < (1<<16));
|
||||
|
||||
size_t start = GetFirstGapPos();
|
||||
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
|
||||
|
||||
size_t end = GetLastPos();
|
||||
if (end == NOT_FOUND) end = 0;// nothing translated yet
|
||||
|
||||
assert(end < start || end-start <= 16);
|
||||
WordsBitmapID id = 0;
|
||||
for(size_t pos = end; pos > start; pos--) {
|
||||
id = id*2 + (int) GetValue(pos);
|
||||
}
|
||||
return id + (1<<16) * start;
|
||||
}
|
||||
|
||||
//! converts bitmap into an integer ID, with an additional span covered
|
||||
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
|
||||
assert(m_bitmap.size() < (1<<16));
|
||||
|
||||
size_t start = GetFirstGapPos();
|
||||
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
|
||||
|
||||
size_t end = GetLastPos();
|
||||
if (end == NOT_FOUND) end = 0;// nothing translated yet
|
||||
|
||||
if (start == startPos) start = endPos+1;
|
||||
if (end < endPos) end = endPos;
|
||||
|
||||
assert(end < start || end-start <= 16);
|
||||
WordsBitmapID id = 0;
|
||||
for(size_t pos = end; pos > start; pos--) {
|
||||
id = id*2;
|
||||
if (GetValue(pos) || (startPos<=pos && pos<=endPos))
|
||||
id++;
|
||||
}
|
||||
return id + (1<<16) * start;
|
||||
}
|
||||
|
||||
// for unordered_set in stack
|
||||
size_t hash() const;
|
||||
bool operator==(const Bitmap& other) const;
|
||||
bool operator!=(const Bitmap& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,123 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "../legacy/Parameter.h"
|
||||
#include "AllOptions.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
AllOptions::
|
||||
AllOptions()
|
||||
: mira(false)
|
||||
, use_legacy_pt(false)
|
||||
{ }
|
||||
|
||||
AllOptions::
|
||||
AllOptions(Parameter const& param)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
if (!search.init(param)) return false;
|
||||
if (!cube.init(param)) return false;
|
||||
if (!nbest.init(param)) return false;
|
||||
if (!reordering.init(param)) return false;
|
||||
if (!context.init(param)) return false;
|
||||
if (!input.init(param)) return false;
|
||||
if (!mbr.init(param)) return false;
|
||||
if (!lmbr.init(param)) return false;
|
||||
if (!output.init(param)) return false;
|
||||
if (!unk.init(param)) return false;
|
||||
if (!server.init(param)) return false;
|
||||
if (!syntax.init(param)) return false;
|
||||
|
||||
param.SetParameter(mira, "mira", false);
|
||||
|
||||
return sanity_check();
|
||||
}
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
sanity_check()
|
||||
{
|
||||
using namespace std;
|
||||
if (lmbr.enabled)
|
||||
{
|
||||
if (mbr.enabled)
|
||||
{
|
||||
cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
|
||||
return false;
|
||||
}
|
||||
mbr.enabled = true;
|
||||
}
|
||||
if (search.consensus)
|
||||
{
|
||||
if (mbr.enabled)
|
||||
{
|
||||
cerr << "Error: Cannot use consensus decoding together with mbr"
|
||||
<< endl;
|
||||
return false;
|
||||
}
|
||||
mbr.enabled = true;
|
||||
}
|
||||
|
||||
// RecoverPath should only be used with confusion net or word lattice input
|
||||
if (output.RecoverPath && input.input_type == SentenceInput)
|
||||
{
|
||||
TRACE_ERR("--recover-input-path should only be used with "
|
||||
<<"confusion net or word lattice input!\n");
|
||||
output.RecoverPath = false;
|
||||
}
|
||||
|
||||
// set m_nbest_options.enabled = true if necessary:
|
||||
nbest.enabled = (nbest.enabled || mira || search.consensus
|
||||
|| nbest.nbest_size > 0
|
||||
|| mbr.enabled || lmbr.enabled
|
||||
|| !output.SearchGraph.empty()
|
||||
|| !output.SearchGraphExtended.empty()
|
||||
|| !output.SearchGraphSLF.empty()
|
||||
|| !output.SearchGraphHG.empty()
|
||||
|| !output.SearchGraphPB.empty()
|
||||
|| output.lattice_sample_size != 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
AllOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
if (!search.update(param)) return false;
|
||||
if (!cube.update(param)) return false;
|
||||
if (!nbest.update(param)) return false;
|
||||
if (!reordering.update(param)) return false;
|
||||
if (!context.update(param)) return false;
|
||||
if (!input.update(param)) return false;
|
||||
if (!mbr.update(param)) return false;
|
||||
if (!lmbr.update(param)) return false;
|
||||
if (!output.update(param)) return false;
|
||||
if (!unk.update(param)) return false;
|
||||
if (!server.update(param)) return false;
|
||||
//if (!syntax.update(param)) return false;
|
||||
return sanity_check();
|
||||
}
|
||||
#endif
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
NBestDistinct() const
|
||||
{
|
||||
return (nbest.only_distinct
|
||||
|| mbr.enabled || lmbr.enabled
|
||||
|| output.lattice_sample_size
|
||||
|| !output.SearchGraph.empty()
|
||||
|| !output.SearchGraphExtended.empty()
|
||||
|| !output.SearchGraphSLF.empty()
|
||||
|| !output.SearchGraphHG.empty());
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include "OptionsBaseClass.h"
|
||||
#include "SearchOptions.h"
|
||||
#include "CubePruningOptions.h"
|
||||
#include "NBestOptions.h"
|
||||
#include "ReorderingOptions.h"
|
||||
#include "ContextParameters.h"
|
||||
#include "InputOptions.h"
|
||||
#include "MBR_Options.h"
|
||||
#include "LMBR_Options.h"
|
||||
#include "ReportingOptions.h"
|
||||
#include "OOVHandlingOptions.h"
|
||||
#include "ServerOptions.h"
|
||||
#include "SyntaxOptions.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
struct
|
||||
AllOptions : public OptionsBaseClass
|
||||
{
|
||||
typedef boost::shared_ptr<AllOptions const> ptr;
|
||||
SearchOptions search;
|
||||
CubePruningOptions cube;
|
||||
NBestOptions nbest;
|
||||
ReorderingOptions reordering;
|
||||
ContextParameters context;
|
||||
InputOptions input;
|
||||
MBR_Options mbr;
|
||||
LMBR_Options lmbr;
|
||||
ReportingOptions output;
|
||||
OOVHandlingOptions unk;
|
||||
ServerOptions server;
|
||||
SyntaxOptions syntax;
|
||||
bool mira;
|
||||
bool use_legacy_pt;
|
||||
// StackOptions stack;
|
||||
// BeamSearchOptions beam;
|
||||
bool init(Parameter const& param);
|
||||
bool sanity_check();
|
||||
AllOptions();
|
||||
AllOptions(Parameter const& param);
|
||||
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
bool NBestDistinct() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
struct
|
||||
BeamSearchOptions : public OptionsBaseClass
|
||||
{
|
||||
bool init(Parameter const& param);
|
||||
BeamSearchOptions(Parameter const& param);
|
||||
};
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Parameter;
|
||||
|
||||
struct BookkeepingOptions : public OptionsBaseClass
|
||||
{
|
||||
bool need_alignment_info;
|
||||
bool init(Parameter const& param);
|
||||
BookkeepingOptions();
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "CubePruningOptions.h"
|
||||
#include "../TypeDef.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
CubePruningOptions::
|
||||
CubePruningOptions()
|
||||
: pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT)
|
||||
, diversity(DEFAULT_CUBE_PRUNING_DIVERSITY)
|
||||
, lazy_scoring(false)
|
||||
, deterministic_search(false)
|
||||
{}
|
||||
|
||||
bool
|
||||
CubePruningOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(pop_limit, "cube-pruning-pop-limit",
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
param.SetParameter(diversity, "cube-pruning-diversity",
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
|
||||
//param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
CubePruningOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
params_t::const_iterator si = params.find("cube-pruning-pop-limit");
|
||||
if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("cube-pruning-diversity");
|
||||
if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("cube-pruning-lazy-scoring");
|
||||
if (si != params.end())
|
||||
{
|
||||
std::string spec = xmlrpc_c::value_string(si->second);
|
||||
if (spec == "true" or spec == "on" or spec == "1")
|
||||
lazy_scoring = true;
|
||||
else if (spec == "false" or spec == "off" or spec == "0")
|
||||
lazy_scoring = false;
|
||||
else
|
||||
{
|
||||
char const* msg
|
||||
= "Error parsing specification for cube-pruning-lazy-scoring";
|
||||
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
}
|
||||
|
||||
si = params.find("cube-pruning-deterministic-search");
|
||||
if (si != params.end())
|
||||
{
|
||||
std::string spec = xmlrpc_c::value_string(si->second);
|
||||
if (spec == "true" or spec == "on" or spec == "1")
|
||||
deterministic_search = true;
|
||||
else if (spec == "false" or spec == "off" or spec == "0")
|
||||
deterministic_search = false;
|
||||
else
|
||||
{
|
||||
char const* msg
|
||||
= "Error parsing specification for cube-pruning-deterministic-search";
|
||||
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
struct
|
||||
CubePruningOptions : public OptionsBaseClass
|
||||
{
|
||||
size_t pop_limit;
|
||||
size_t diversity;
|
||||
bool lazy_scoring;
|
||||
bool deterministic_search;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
CubePruningOptions(Parameter const& param);
|
||||
CubePruningOptions();
|
||||
|
||||
bool
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params);
|
||||
};
|
||||
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "InputOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
// #include "moses/StaticData.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
InputOptions::
|
||||
InputOptions()
|
||||
: continue_partial_translation(false)
|
||||
, input_type(SentenceInput)
|
||||
, xml_policy(XmlPassThrough)
|
||||
, placeholder_factor(NOT_FOUND)
|
||||
{
|
||||
xml_brackets.first = "<";
|
||||
xml_brackets.second = ">";
|
||||
factor_order.assign(1,0);
|
||||
factor_delimiter = "|";
|
||||
}
|
||||
|
||||
bool
|
||||
InputOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(input_type, "inputtype", SentenceInput);
|
||||
#if 0
|
||||
if (input_type == SentenceInput)
|
||||
{ VERBOSE(2, "input type is: text input"); }
|
||||
else if (input_type == ConfusionNetworkInput)
|
||||
{ VERBOSE(2, "input type is: confusion net"); }
|
||||
else if (input_type == WordLatticeInput)
|
||||
{ VERBOSE(2, "input type is: word lattice"); }
|
||||
else if (input_type == TreeInputType)
|
||||
{ VERBOSE(2, "input type is: tree"); }
|
||||
else if (input_type == TabbedSentenceInput)
|
||||
{ VERBOSE(2, "input type is: tabbed sentence"); }
|
||||
else if (input_type == ForestInputType)
|
||||
{ VERBOSE(2, "input type is: forest"); }
|
||||
#endif
|
||||
|
||||
|
||||
param.SetParameter(continue_partial_translation,
|
||||
"continue-partial-translation", false);
|
||||
|
||||
param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
// Do we really want this to be configurable???? UG
|
||||
const PARAM_VEC *pspec;
|
||||
pspec = param.GetParam("xml-brackets");
|
||||
if (pspec && pspec->size())
|
||||
{
|
||||
std::vector<std::string> brackets = Tokenize(pspec->at(0));
|
||||
if(brackets.size()!=2)
|
||||
{
|
||||
std::cerr << "invalid xml-brackets value, "
|
||||
<< "must specify exactly 2 blank-delimited strings "
|
||||
<< "for XML tags opening and closing brackets"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
|
||||
#if 0
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
#endif
|
||||
}
|
||||
|
||||
pspec = param.GetParam("input-factors");
|
||||
if (pspec) factor_order = Scan<FactorType>(*pspec);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
|
||||
|
||||
param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
|
||||
param.SetParameter<std::string>(input_file_path,"input-file","");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
InputOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
params_t::const_iterator si = param.find("xml-input");
|
||||
if (si != param.end())
|
||||
xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
#include "../TypeDef.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
struct
|
||||
InputOptions : public OptionsBaseClass
|
||||
{
|
||||
bool continue_partial_translation;
|
||||
InputTypeEnum input_type;
|
||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||
std::vector<FactorType> factor_order; // input factor order
|
||||
std::string factor_delimiter;
|
||||
FactorType placeholder_factor; // where to store original text for placeholders
|
||||
std::string input_file_path;
|
||||
std::pair<std::string,std::string> xml_brackets;
|
||||
// strings to use as XML tags' opening and closing brackets.
|
||||
// Default are "<" and ">"
|
||||
|
||||
InputOptions();
|
||||
|
||||
bool init(Parameter const& param);
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "LMBR_Options.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
LMBR_Options::
|
||||
LMBR_Options()
|
||||
: enabled(false)
|
||||
, use_lattice_hyp_set(false)
|
||||
, precision(0.8f)
|
||||
, ratio(0.6f)
|
||||
, map_weight(0.8f)
|
||||
, pruning_factor(30)
|
||||
{ }
|
||||
|
||||
bool
|
||||
LMBR_Options::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(enabled, "lminimum-bayes-risk", false);
|
||||
|
||||
param.SetParameter(ratio, "lmbr-r", 0.6f);
|
||||
param.SetParameter(precision, "lmbr-p", 0.8f);
|
||||
param.SetParameter(map_weight, "lmbr-map-weight", 0.0f);
|
||||
param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30));
|
||||
param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false);
|
||||
|
||||
PARAM_VEC const* params = param.GetParam("lmbr-thetas");
|
||||
if (params) theta = Scan<float>(*params);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
// Options for mimum bayes risk decoding
|
||||
struct
|
||||
LMBR_Options : public OptionsBaseClass
|
||||
{
|
||||
bool enabled;
|
||||
bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR
|
||||
float precision; //! unigram precision theta - see Tromble et al 08 for more details
|
||||
float ratio; //! decaying factor for ngram thetas - see Tromble et al 08
|
||||
float map_weight; //! Weight given to the map solution. See Kumar et al 09
|
||||
size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice
|
||||
std::vector<float> theta; //! theta(s) for lattice mbr calculation
|
||||
bool init(Parameter const& param);
|
||||
LMBR_Options();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,26 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "MBR_Options.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
MBR_Options::
|
||||
MBR_Options()
|
||||
: enabled(false)
|
||||
, size(200)
|
||||
, scale(1.0f)
|
||||
{}
|
||||
|
||||
|
||||
bool
|
||||
MBR_Options::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(enabled, "minimum-bayes-risk", false);
|
||||
param.SetParameter<size_t>(size, "mbr-size", 200);
|
||||
param.SetParameter(scale, "mbr-scale", 1.0f);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
// Options for mimum bayes risk decoding
|
||||
struct
|
||||
MBR_Options : public OptionsBaseClass
|
||||
{
|
||||
bool enabled;
|
||||
size_t size; //! number of translation candidates considered
|
||||
float scale; /*! scaling factor for computing marginal probability
|
||||
* of candidate translation */
|
||||
bool init(Parameter const& param);
|
||||
MBR_Options();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,50 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "OOVHandlingOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
OOVHandlingOptions::
|
||||
OOVHandlingOptions()
|
||||
{
|
||||
drop = false;
|
||||
mark = false;
|
||||
prefix = "UNK";
|
||||
suffix = "";
|
||||
word_deletion_enabled = false;
|
||||
always_create_direct_transopt = false;
|
||||
}
|
||||
|
||||
bool
|
||||
OOVHandlingOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(drop,"drop-unknown",false);
|
||||
param.SetParameter(mark,"mark-unknown",false);
|
||||
param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
|
||||
param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
|
||||
param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
|
||||
param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
OOVHandlingOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
// params_t::const_iterator si = param.find("xml-input");
|
||||
// if (si != param.end())
|
||||
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
struct
|
||||
OOVHandlingOptions : public OptionsBaseClass
|
||||
{
|
||||
bool drop;
|
||||
bool mark;
|
||||
std::string prefix;
|
||||
std::string suffix;
|
||||
|
||||
bool word_deletion_enabled;
|
||||
bool always_create_direct_transopt;
|
||||
OOVHandlingOptions();
|
||||
|
||||
bool init(Parameter const& param);
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,30 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
#include "OptionsBaseClass.h"
|
||||
#include "moses/Util.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
OptionsBaseClass::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
OptionsBaseClass::
|
||||
check(std::map<std::string, xmlrpc_c::value> const& param,
|
||||
std::string const key, bool dfltval)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
m = param.find(key);
|
||||
if (m == param.end()) return dfltval;
|
||||
return Scan<bool>(xmlrpc_c::value_string(m->second));
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include "moses/xmlrpc-c.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
namespace Moses2
|
||||
{
|
||||
class Parameter;
|
||||
|
||||
struct OptionsBaseClass
|
||||
{
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
virtual bool
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params);
|
||||
#endif
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& param,
|
||||
std::string const key, bool dfltval);
|
||||
};
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "ReorderingOptions.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
ReorderingOptions::
|
||||
ReorderingOptions()
|
||||
: max_distortion(-1)
|
||||
, monotone_at_punct(false)
|
||||
, use_early_distortion_cost(false)
|
||||
{}
|
||||
|
||||
|
||||
ReorderingOptions::
|
||||
ReorderingOptions(Parameter const& param)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
ReorderingOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(max_distortion, "distortion-limit", -1);
|
||||
param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
|
||||
param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
struct
|
||||
ReorderingOptions : public OptionsBaseClass
|
||||
{
|
||||
int max_distortion;
|
||||
bool monotone_at_punct;
|
||||
bool use_early_distortion_cost;
|
||||
bool init(Parameter const& param);
|
||||
ReorderingOptions(Parameter const& param);
|
||||
ReorderingOptions();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,152 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "ReportingOptions.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
ReportingOptions::
|
||||
ReportingOptions()
|
||||
: start_translation_id(0)
|
||||
, ReportAllFactors(false)
|
||||
, ReportSegmentation(0)
|
||||
, PrintAlignmentInfo(false)
|
||||
, PrintAllDerivations(false)
|
||||
, PrintTranslationOptions(false)
|
||||
, WA_SortOrder(NoSort)
|
||||
, WordGraph(false)
|
||||
, DontPruneSearchGraph(false)
|
||||
, RecoverPath(false)
|
||||
, ReportHypoScore(false)
|
||||
, PrintID(false)
|
||||
, PrintPassThrough(false)
|
||||
, include_lhs_in_search_graph(false)
|
||||
, lattice_sample_size(0)
|
||||
{
|
||||
factor_order.assign(1,0);
|
||||
factor_delimiter = "|";
|
||||
}
|
||||
|
||||
bool
|
||||
ReportingOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter<long>(start_translation_id, "start-translation-id", 0);
|
||||
|
||||
// including factors in the output
|
||||
param.SetParameter(ReportAllFactors, "report-all-factors", false);
|
||||
|
||||
// segmentation reporting
|
||||
ReportSegmentation = (param.GetParam("report-segmentation-enriched")
|
||||
? 2 : param.GetParam("report-segmentation")
|
||||
? 1 : 0);
|
||||
|
||||
// word alignment reporting
|
||||
param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
|
||||
param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
|
||||
std::string e; // hack to save us param.SetParameter<string>(...)
|
||||
param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
|
||||
|
||||
|
||||
param.SetParameter(PrintAllDerivations, "print-all-derivations", false);
|
||||
param.SetParameter(PrintTranslationOptions, "print-translation-option", false);
|
||||
|
||||
// output a word graph
|
||||
PARAM_VEC const* params;
|
||||
params = param.GetParam("output-word-graph");
|
||||
WordGraph = (params && params->size() == 2); // what are the two options?
|
||||
|
||||
// dump the search graph
|
||||
param.SetParameter(SearchGraph, "output-search-graph", e);
|
||||
param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
|
||||
param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
|
||||
param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
|
||||
#ifdef HAVE_PROTOBUF
|
||||
param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
|
||||
#endif
|
||||
|
||||
param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
|
||||
param.SetParameter(include_lhs_in_search_graph,
|
||||
"include-lhs-in-search-graph", false );
|
||||
|
||||
|
||||
// miscellaneous
|
||||
param.SetParameter(RecoverPath, "recover-input-path",false);
|
||||
param.SetParameter(ReportHypoScore, "output-hypo-score",false);
|
||||
param.SetParameter(PrintID, "print-id",false);
|
||||
param.SetParameter(PrintPassThrough, "print-passthrough",false);
|
||||
param.SetParameter(detailed_all_transrep_filepath,
|
||||
"translation-all-details", e);
|
||||
param.SetParameter(detailed_transrep_filepath, "translation-details", e);
|
||||
param.SetParameter(detailed_tree_transrep_filepath,
|
||||
"tree-translation-details", e);
|
||||
|
||||
params = param.GetParam("lattice-samples");
|
||||
if (params) {
|
||||
if (params->size() ==2 ) {
|
||||
lattice_sample_filepath = params->at(0);
|
||||
lattice_sample_size = Scan<size_t>(params->at(1));
|
||||
} else {
|
||||
std::cerr <<"wrong format for switch -lattice-samples file size";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (ReportAllFactors) {
|
||||
factor_order.clear();
|
||||
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
|
||||
factor_order.push_back(i);
|
||||
} else {
|
||||
params= param.GetParam("output-factors");
|
||||
if (params) factor_order = Scan<FactorType>(*params);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
}
|
||||
|
||||
param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
|
||||
param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
ReportingOptions::
|
||||
update(std::map<std::string, xmlrpc_c::value>const& param)
|
||||
{
|
||||
ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
|
||||
|
||||
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
m = param.find("output-factors");
|
||||
if (m != param.end()) {
|
||||
factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),",");
|
||||
}
|
||||
|
||||
if (ReportAllFactors) {
|
||||
factor_order.clear();
|
||||
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
|
||||
factor_order.push_back(i);
|
||||
}
|
||||
|
||||
m = param.find("align");
|
||||
if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
|
||||
ReportSegmentation = 1;
|
||||
|
||||
PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
|
||||
|
||||
m = param.find("factor-delimiter");
|
||||
if (m != param.end()) {
|
||||
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
}
|
||||
|
||||
m = param.find("output-factor-delimiter");
|
||||
if (m != param.end()) {
|
||||
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "OptionsBaseClass.h"
|
||||
#include "../TypeDef.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
struct
|
||||
ReportingOptions : public OptionsBaseClass
|
||||
{
|
||||
long start_translation_id;
|
||||
|
||||
std::vector<FactorType> factor_order;
|
||||
std::string factor_delimiter;
|
||||
|
||||
bool ReportAllFactors; // m_reportAllFactors;
|
||||
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
||||
|
||||
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
|
||||
bool PrintAllDerivations;
|
||||
bool PrintTranslationOptions;
|
||||
|
||||
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
|
||||
std::string AlignmentOutputFile;
|
||||
|
||||
bool WordGraph;
|
||||
|
||||
std::string SearchGraph;
|
||||
std::string SearchGraphExtended;
|
||||
std::string SearchGraphSLF;
|
||||
std::string SearchGraphHG;
|
||||
std::string SearchGraphPB;
|
||||
bool DontPruneSearchGraph;
|
||||
|
||||
bool RecoverPath; // recover input path?
|
||||
bool ReportHypoScore;
|
||||
|
||||
bool PrintID;
|
||||
bool PrintPassThrough;
|
||||
|
||||
// transrep = translation reporting
|
||||
std::string detailed_transrep_filepath;
|
||||
std::string detailed_tree_transrep_filepath;
|
||||
std::string detailed_all_transrep_filepath;
|
||||
bool include_lhs_in_search_graph;
|
||||
|
||||
|
||||
std::string lattice_sample_filepath;
|
||||
size_t lattice_sample_size;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
|
||||
/// do we need to keep the search graph from decoding?
|
||||
bool NeedSearchGraph() const {
|
||||
return !(SearchGraph.empty() && SearchGraphExtended.empty());
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool update(std::map<std::string, xmlrpc_c::value>const& param);
|
||||
#endif
|
||||
|
||||
|
||||
ReportingOptions();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,107 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "SearchOptions.h"
|
||||
#include "../legacy/Parameter.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
SearchOptions::
|
||||
SearchOptions()
|
||||
: algo(Normal)
|
||||
, stack_size(DEFAULT_MAX_HYPOSTACK_SIZE)
|
||||
, stack_diversity(0)
|
||||
, disable_discarding(false)
|
||||
, max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH)
|
||||
, max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE)
|
||||
, max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE)
|
||||
, beam_width(DEFAULT_BEAM_WIDTH)
|
||||
, timeout(0)
|
||||
, consensus(false)
|
||||
, early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD)
|
||||
, trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD)
|
||||
{ }
|
||||
|
||||
SearchOptions::
|
||||
SearchOptions(Parameter const& param)
|
||||
: stack_diversity(0)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
SearchOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(algo, "search-algorithm", Normal);
|
||||
param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
|
||||
param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
|
||||
DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
param.SetParameter(timeout, "time-out", 0);
|
||||
param.SetParameter(max_phrase_length, "max-phrase-length",
|
||||
DEFAULT_MAX_PHRASE_LENGTH);
|
||||
param.SetParameter(trans_opt_threshold, "translation-option-threshold",
|
||||
DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
|
||||
DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
|
||||
DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
|
||||
param.SetParameter(consensus, "consensus-decoding", false);
|
||||
param.SetParameter(disable_discarding, "disable-discarding", false);
|
||||
|
||||
// transformation to log of a few scores
|
||||
beam_width = TransformScore(beam_width);
|
||||
trans_opt_threshold = TransformScore(trans_opt_threshold);
|
||||
early_discarding_threshold = TransformScore(early_discarding_threshold);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
is_syntax(SearchAlgorithm algo)
|
||||
{
|
||||
return (algo == CYKPlus || algo == ChartIncremental ||
|
||||
algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
SearchOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
params_t::const_iterator si = params.find("search-algorithm");
|
||||
if (si != params.end())
|
||||
{
|
||||
// use named parameters
|
||||
std::string spec = xmlrpc_c::value_string(si->second);
|
||||
if (spec == "normal" || spec == "0") algo = Normal;
|
||||
else if (spec == "cube" || spec == "1") algo = CubePruning;
|
||||
else throw xmlrpc_c::fault("Unsupported search algorithm",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
|
||||
si = params.find("stack");
|
||||
if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("stack-diversity");
|
||||
if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("beam-threshold");
|
||||
if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second);
|
||||
|
||||
si = params.find("time-out");
|
||||
if (si != params.end()) timeout = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("max-phrase-length");
|
||||
if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include "OptionsBaseClass.h"
|
||||
#include "../TypeDef.h"
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
|
||||
bool is_syntax(SearchAlgorithm algo);
|
||||
|
||||
struct
|
||||
SearchOptions : public OptionsBaseClass
|
||||
{
|
||||
SearchAlgorithm algo;
|
||||
|
||||
// stack decoding
|
||||
size_t stack_size; // maxHypoStackSize;
|
||||
size_t stack_diversity; // minHypoStackDiversity;
|
||||
bool disable_discarding;
|
||||
// Disable discarding of bad hypotheses from HypothesisStackNormal
|
||||
size_t max_phrase_length;
|
||||
size_t max_trans_opt_per_cov;
|
||||
size_t max_partial_trans_opt;
|
||||
// beam search
|
||||
float beam_width;
|
||||
|
||||
int timeout;
|
||||
|
||||
bool consensus; //! Use Consensus decoding (DeNero et al 2009)
|
||||
|
||||
// reordering options
|
||||
// bool reorderingConstraint; //! use additional reordering constraints
|
||||
// bool useEarlyDistortionCost;
|
||||
|
||||
float early_discarding_threshold;
|
||||
float trans_opt_threshold;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
SearchOptions(Parameter const& param);
|
||||
SearchOptions();
|
||||
|
||||
bool
|
||||
UseEarlyDiscarding() const {
|
||||
return early_discarding_threshold != -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
bool
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params);
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
#include <xmlrpc-c/base.hpp>
|
||||
#include <xmlrpc-c/registry.hpp>
|
||||
#include <xmlrpc-c/server_abyss.hpp>
|
||||
|
||||
namespace Moses2
|
||||
{
|
||||
class Parameter;
|
||||
|
||||
struct
|
||||
ServerOptions
|
||||
{
|
||||
bool is_serial;
|
||||
uint32_t numThreads; // might not be used any more, actually
|
||||
|
||||
size_t sessionTimeout; // this is related to Moses translation sessions
|
||||
size_t sessionCacheSize; // this is related to Moses translation sessions
|
||||
|
||||
int port; // this is for the abyss server
|
||||
std::string logfile; // this is for the abyss server
|
||||
int maxConn; // this is for the abyss server
|
||||
int maxConnBacklog; // this is for the abyss server
|
||||
int keepaliveTimeout; // this is for the abyss server
|
||||
int keepaliveMaxConn; // this is for the abyss server
|
||||
int timeout; // this is for the abyss server
|
||||
|
||||
bool init(Parameter const& param);
|
||||
ServerOptions(Parameter const& param);
|
||||
ServerOptions();
|
||||
|
||||
bool
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
|
@ -5,36 +5,37 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.602770742." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1436139469" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.622899770" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1448999623" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2139008298" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2008193341" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.285185442" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.587301391" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.dialect.std.1726327101" superClass="gnu.c.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.c.compiler.dialect.c11" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1464765114" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.606542044" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
|
||||
@ -65,12 +66,14 @@
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="probingpt"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/probingpt/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../cmph/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../xmlrpc-c/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/search/Debug""/>
|
||||
@ -78,6 +81,7 @@
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/moses/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/lm/Debug""/>
|
||||
<listOptionValue builtIn="false" value="/opt/local/lib"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2077999464" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
@ -97,29 +101,29 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GNU_ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.168814843." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.844577457" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1635721038" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.361379130" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.799410017" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1404799808" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
|
||||
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2139553528" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1633770352" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1045097629" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.455462639" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user