Merge branch 'master' of http://github.com/moses-smt/mosesdecoder into ranked-sampling

This commit is contained in:
Ulrich Germann 2015-10-17 16:37:41 +01:00
commit 7a85126a92
108 changed files with 980 additions and 410 deletions

View File

@ -7,3 +7,4 @@ into the source tree from elsewhere:
* "bjam-files" is taken from Boost.
* "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm

View File

@ -25,6 +25,7 @@
#include "OnDiskWrapper.h"
#include "moses/Factor.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
using namespace std;
@ -223,7 +224,8 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &fact
{
bool isNonTerminal = origWord.IsNonTerminal();
Word *newWord = new Word(isNonTerminal);
stringstream strme;
util::StringStream strme;
size_t factorType = factorsVec[0];
const Moses::Factor *factor = origWord.GetFactor(factorType);

View File

@ -3145,6 +3145,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVector.h</locationURI>
</link>
<link>
<name>TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp</locationURI>
</link>
<link>
<name>TranslationModel/CompactPT/TargetPhraseCollectionCache.h</name>
<type>1</type>

View File

@ -1,5 +1,5 @@
SALMDIR=/Users/hieuhoang/workspace/salm
FLAVOR?=o64
FLAVOR?=o32
INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch
OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR)

View File

@ -33,10 +33,9 @@ template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t
}
size_t SizeNeededForCounts(const std::vector<uint64_t> &number) {
std::string buf;
util::StringStream stream(buf);
util::StringStream stream;
WriteCounts(stream, number);
return buf.size();
return stream.str().size();
}
bool IsEntirelyWhiteSpace(const StringPiece &line) {

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main.
**/
#include "moses/ExportInterface.h"
#include "util/string_stream.hh"
/** main function of the command line version of the decoder **/
int main(int argc, char** argv)

View File

@ -181,32 +181,6 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr
}
}
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
much quicker. Returns one of 3 possible values:
-1 = this < compare
+1 = this > compare
0 = this ==compare
\param compare the other hypo to compare to
*/
int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
{
int comp = 0;
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL)
comp = m_ffStates[i] - compare.m_ffStates[i];
else
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
if (comp != 0)
return comp;
}
return 0;
}
/** calculate total score */
void ChartHypothesis::EvaluateWhenApplied()
{
@ -325,6 +299,33 @@ void ChartHypothesis::SetWinningHypo(const ChartHypothesis *hypo)
m_winningHypo = hypo;
}
size_t ChartHypothesis::hash() const
{
size_t seed;
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState *state = m_ffStates[i];
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
return seed;
}
bool ChartHypothesis::operator==(const ChartHypothesis& other) const
{
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState &thisState = *m_ffStates[i];
const FFState &otherState = *other.m_ffStates[i];
if (thisState != otherState) {
return false;
}
}
return true;
}
TO_STRING_BODY(ChartHypothesis)
// friend

View File

@ -146,8 +146,6 @@ public:
// leftRightMost: 1=left, 2=right
void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const;
int RecombineCompare(const ChartHypothesis &compare) const;
void EvaluateWhenApplied();
void AddArc(ChartHypothesis *loserHypo);
@ -214,6 +212,10 @@ public:
return m_winningHypo;
}
// for unordered_set in stack
size_t hash() const;
bool operator==(const ChartHypothesis& other) const;
TO_STRING();
}; // class ChartHypothesis

View File

@ -167,20 +167,6 @@ void ChartHypothesisCollection::Detach(const HCType::iterator &iter)
void ChartHypothesisCollection::Remove(const HCType::iterator &iter)
{
ChartHypothesis *h = *iter;
/*
stringstream strme("");
strme << h->GetOutputPhrase();
string toFind = "the goal of gene scientists is ";
size_t pos = toFind.find(strme.str());
if (pos == 0)
{
cerr << pos << " " << strme.str() << *h << endl;
cerr << *this << endl;
}
*/
Detach(iter);
ChartHypothesis::Delete(h);
}

View File

@ -42,18 +42,17 @@ public:
/** functor to compare (chart) hypotheses by feature function states.
* If 2 hypos are equal, according to this functor, then they can be recombined.
*/
class ChartHypothesisRecombinationOrderer
class ChartHypothesisRecombinationUnordered
{
public:
bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
// assert in same cell
assert(hypoA->GetCurrSourceRange() == hypoB->GetCurrSourceRange());
// shouldn't be mixing hypos with different lhs
assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS());
return (hypoA->RecombineCompare(*hypoB) < 0);
size_t operator()(const ChartHypothesis* hypo) const {
return hypo->hash();
}
bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
return (*hypoA) == (*hypoB);
}
};
/** Contains a set of unique hypos that have the same HS non-term.
@ -64,7 +63,8 @@ class ChartHypothesisCollection
friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
protected:
typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
//typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
typedef boost::unordered_set< ChartHypothesis*, ChartHypothesisRecombinationUnordered, ChartHypothesisRecombinationUnordered > HCType;
HCType m_hypos;
HypoList m_hyposOrdered;

View File

@ -70,7 +70,7 @@ ConfusionNet() : InputType()
if (SD.IsSyntax()) {
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
}
ConfusionNet::
@ -140,9 +140,9 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
Clear();
// const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
const InputFeature *inputFeature = InputFeature::InstancePtr();
size_t numInputScores = inputFeature->GetNumInputScores();
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0);

View File

@ -48,6 +48,32 @@ int BleuScoreState::Compare(const FFState& o) const
return 0;
}
size_t BleuScoreState::hash() const
{
if (StaticData::Instance().IsSyntax())
return 0;
size_t ret = hash_value(m_words);
return ret;
}
bool BleuScoreState::operator==(const FFState& o) const
{
if (&o == this)
return true;
if (StaticData::Instance().IsSyntax())
return true;
const BleuScoreState& other = static_cast<const BleuScoreState&>(o);
int c = m_words.Compare(other.m_words);
if (c == 0)
return true;
return false;
}
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state)
{
state.print(out);

View File

@ -26,6 +26,9 @@ public:
BleuScoreState();
virtual int Compare(const FFState& other) const;
size_t hash() const;
virtual bool operator==(const FFState& other) const;
void print(std::ostream& out) const;
private:

View File

@ -29,6 +29,19 @@ int ConstrainedDecodingState::Compare(const FFState& other) const
return ret;
}
size_t ConstrainedDecodingState::hash() const
{
size_t ret = hash_value(m_outputPhrase);
return ret;
}
bool ConstrainedDecodingState::operator==(const FFState& other) const
{
const ConstrainedDecodingState &otherFF = static_cast<const ConstrainedDecodingState&>(other);
bool ret = m_outputPhrase == otherFF.m_outputPhrase;
return ret;
}
//////////////////////////////////////////////////////////////////
ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
:StatefulFeatureFunction(1, line)

View File

@ -18,6 +18,8 @@ public:
ConstrainedDecodingState(const ChartHypothesis &hypo);
int Compare(const FFState& other) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
const Phrase &GetPhrase() const {
return m_outputPhrase;

View File

@ -48,6 +48,33 @@ int ControlRecombinationState::Compare(const FFState& other) const
}
}
size_t ControlRecombinationState::hash() const
{
size_t ret;
if (m_ff.GetType() == SameOutput) {
ret = hash_value(m_outputPhrase);
} else {
// compare hypo address. Won't be equal unless they're actually the same hypo
ret = (size_t) m_hypo;
}
return ret;
}
bool ControlRecombinationState::operator==(const FFState& other) const
{
const ControlRecombinationState &otherFF = static_cast<const ControlRecombinationState&>(other);
if (m_ff.GetType() == SameOutput) {
int ret = m_outputPhrase.Compare(otherFF.m_outputPhrase);
return ret == 0;
} else {
// compare hypo address. Won't be equal unless they're actually the same hypo
if (m_hypo == otherFF.m_hypo)
return true;
return (m_hypo == otherFF.m_hypo);
}
}
std::vector<float> ControlRecombination::DefaultWeights() const
{
UTIL_THROW_IF2(m_numScoreComponents,

View File

@ -27,6 +27,8 @@ public:
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
int Compare(const FFState& other) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
const Phrase &GetPhrase() const {
return m_outputPhrase;

View File

@ -1,6 +1,8 @@
#include <boost/functional/hash.hpp>
#include <vector>
#include <algorithm>
#include <iterator>
#include <boost/foreach.hpp>
#include "CoveredReferenceFeature.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
@ -40,6 +42,17 @@ int CoveredReferenceState::Compare(const FFState& other) const
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
}
size_t CoveredReferenceState::hash() const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool CoveredReferenceState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown

View File

@ -20,6 +20,9 @@ public:
std::multiset<std::string> m_coveredRef;
int Compare(const FFState& other) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
};
class CoveredReferenceFeature : public StatefulFeatureFunction

View File

@ -20,6 +20,16 @@ struct DistortionState_traditional : public FFState {
if (range.GetEndPos() > o.range.GetEndPos()) return 1;
return 0;
}
size_t hash() const {
return range.GetEndPos();
}
virtual bool operator==(const FFState& other) const {
const DistortionState_traditional& o =
static_cast<const DistortionState_traditional&>(other);
return range.GetEndPos() == o.range.GetEndPos();
}
};
std::vector<const DistortionScoreProducer*> DistortionScoreProducer::s_staticColl;

View File

@ -2,7 +2,8 @@
#define moses_FFState_h
#include <vector>
#include <stddef.h>
#include "util/exception.hh"
namespace Moses
{
@ -11,7 +12,13 @@ class FFState
{
public:
virtual ~FFState();
virtual int Compare(const FFState& other) const = 0;
//virtual int Compare(const FFState& other) const = 0;
virtual size_t hash() const = 0;
virtual bool operator==(const FFState& other) const = 0;
virtual bool operator!=(const FFState& other) const {
return !(*this == other);
}
};
class DummyState : public FFState
@ -21,6 +28,15 @@ public:
int Compare(const FFState& other) const {
return 0;
}
virtual size_t hash() const {
return 0;
}
virtual bool operator==(const FFState& other) const {
return true;
}
};
}

View File

@ -112,12 +112,10 @@ void FeatureFunction::ParseLine(const std::string &line)
if (m_description == "") {
size_t index = description_counts.count(nameStub);
ostringstream dstream;
dstream << nameStub;
dstream << index;
string descr = SPrint(nameStub) + SPrint(index);
description_counts.insert(nameStub);
m_description = dstream.str();
m_description = descr;
}
}

View File

@ -5,6 +5,7 @@
#include "moses/Hypothesis.h"
#include "moses/TranslationTask.h"
#include "util/string_piece_hash.hh"
#include "util/string_stream.hh"
using namespace std;
@ -131,7 +132,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo
}
if (m_biasFeature) {
stringstream feature;
util::StringStream feature;
feature << "glm_";
feature << targetString;
feature << "~";
@ -165,7 +166,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo
if (m_sourceContext) {
if (sourceIndex == 0) {
// add <s> trigger feature for source
stringstream feature;
util::StringStream feature;
feature << "glm_";
feature << targetString;
feature << "~";
@ -183,7 +184,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo
contextExists = FindStringPiece(m_vocabSource, contextString ) != m_vocabSource.end();
if (m_unrestricted || contextExists) {
stringstream feature;
util::StringStream feature;
feature << "glm_";
feature << targetString;
feature << "~";
@ -304,7 +305,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo
}
}
} else {
stringstream feature;
util::StringStream feature;
feature << "glm_";
feature << targetString;
feature << "~";
@ -323,7 +324,7 @@ void GlobalLexicalModelUnlimited::AddFeature(ScoreComponentCollection* accumulat
StringPiece sourceTrigger, StringPiece sourceWord,
StringPiece targetTrigger, StringPiece targetWord) const
{
stringstream feature;
util::StringStream feature;
feature << "glm_";
feature << targetTrigger;
feature << ",";

View File

@ -16,8 +16,6 @@
#include "moses/FactorTypeSet.h"
#include "moses/Sentence.h"
#include "moses/FF/FFState.h"
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif
@ -76,10 +74,6 @@ public:
void InitializeForInput(ttasksptr const& ttask);
const FFState* EmptyHypothesisState(const InputType &) const {
return new DummyState();
}
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo,

View File

@ -17,11 +17,8 @@ protected:
bool m_legacy;
public:
static const InputFeature& Instance() {
return *s_instance;
}
static InputFeature& InstanceNonConst() {
return *s_instance;
static const InputFeature *InstancePtr() {
return s_instance;
}
InputFeature(const std::string &line);

View File

@ -143,7 +143,15 @@ public:
int Compare(const FFState& other) const {
return 0;
};
}
virtual size_t hash() const {
return 0;
}
virtual bool operator==(const FFState& other) const {
return true;
}
};
}
}

View File

@ -107,7 +107,7 @@ EvaluateWhenApplied(const Hypothesis& hypo,
{
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0);
const LRState *prev = dynamic_cast<const LRState *>(prev_state);
const LRState *prev = static_cast<const LRState *>(prev_state);
LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
out->PlusEquals(this, score);

View File

@ -331,6 +331,32 @@ Compare(const FFState& o) const
return 1;
}
size_t PhraseBasedReorderingState::hash() const
{
size_t ret;
ret = hash_value(m_prevRange);
boost::hash_combine(ret, m_direction);
return ret;
}
bool PhraseBasedReorderingState::operator==(const FFState& o) const
{
if (&o == this) return true;
const PhraseBasedReorderingState &other = static_cast<const PhraseBasedReorderingState&>(o);
if (m_prevRange == other.m_prevRange) {
if (m_direction == LRModel::Forward) {
int compareScore = ComparePrevScores(other.m_prevOption);
return compareScore == 0;
} else {
return true;
}
} else {
return false;
}
}
LRState*
PhraseBasedReorderingState::
Expand(const TranslationOption& topt, const InputType& input,
@ -356,6 +382,7 @@ int
BidirectionalReorderingState::
Compare(FFState const& o) const
{
/*
if (&o == this) return 0;
BidirectionalReorderingState const &other
@ -363,6 +390,25 @@ Compare(FFState const& o) const
int cmp = m_backward->Compare(*other.m_backward);
return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward);
*/
}
size_t BidirectionalReorderingState::hash() const
{
size_t ret = m_backward->hash();
boost::hash_combine(ret, m_forward->hash());
return ret;
}
bool BidirectionalReorderingState::operator==(const FFState& o) const
{
if (&o == this) return 0;
BidirectionalReorderingState const &other
= static_cast<BidirectionalReorderingState const&>(o);
bool ret = (*m_backward == *other.m_backward) && (*m_forward == *other.m_forward);
return ret;
}
LRState*
@ -400,6 +446,20 @@ Compare(const FFState& o) const
return m_reoStack.Compare(other.m_reoStack);
}
size_t HReorderingBackwardState::hash() const
{
size_t ret = m_reoStack.hash();
return ret;
}
bool HReorderingBackwardState::operator==(const FFState& o) const
{
const HReorderingBackwardState& other
= static_cast<const HReorderingBackwardState&>(o);
bool ret = m_reoStack == other.m_reoStack;
return ret;
}
LRState*
HReorderingBackwardState::
Expand(const TranslationOption& topt, const InputType& input,
@ -451,6 +511,26 @@ Compare(const FFState& o) const
: (m_prevRange < other.m_prevRange) ? -1 : 1);
}
size_t HReorderingForwardState::hash() const
{
size_t ret;
ret = hash_value(m_prevRange);
return ret;
}
bool HReorderingForwardState::operator==(const FFState& o) const
{
if (&o == this) return true;
HReorderingForwardState const& other
= static_cast<HReorderingForwardState const&>(o);
int compareScores = ((m_prevRange == other.m_prevRange)
? ComparePrevScores(other.m_prevOption)
: (m_prevRange < other.m_prevRange) ? -1 : 1);
return compareScores == 0;
}
// For compatibility with the phrase-based reordering model, scoring is one
// step delayed.
// The forward model takes determines orientations heuristically as follows:

View File

@ -143,10 +143,6 @@ public:
typedef LRModel::ReorderingType ReorderingType;
virtual
int
Compare(const FFState& o) const = 0;
virtual
LRState*
Expand(const TranslationOption& hypo, const InputType& input,
@ -226,7 +222,9 @@ public:
int
Compare(const FFState& o) const;
virtual
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
LRState*
Expand(const TranslationOption& topt, const InputType& input,
ScoreComponentCollection* scores) const;
@ -253,6 +251,9 @@ public:
int
Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual
LRState*
Expand(const TranslationOption& topt,const InputType& input,
@ -278,6 +279,10 @@ public:
ReorderingStack reoStack);
virtual int Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
ScoreComponentCollection* scores) const;
@ -304,6 +309,10 @@ public:
const TranslationOption &topt);
virtual int Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual LRState* Expand(const TranslationOption& hypo,
const InputType& input,
ScoreComponentCollection* scores) const;

View File

@ -20,6 +20,18 @@ int ReorderingStack::Compare(const ReorderingStack& o) const
return 0;
}
size_t ReorderingStack::hash() const
{
std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
return ret;
}
bool ReorderingStack::operator==(const ReorderingStack& o) const
{
const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
return m_stack == other.m_stack;
}
// Method to push (shift element into the stack and reduce if reqd)
int ReorderingStack::ShiftReduce(WordsRange input_span)
{

View File

@ -28,6 +28,9 @@ private:
public:
int Compare(const ReorderingStack& o) const;
size_t hash() const;
bool operator==(const ReorderingStack& other) const;
int ShiftReduce(WordsRange input_span);
private:

View File

@ -8,6 +8,7 @@
#include "util/file_piece.hh"
#include "util/string_piece.hh"
#include "util/string_stream.hh"
#include "util/tokenize_piece.hh"
#include "LexicalReordering.h"
@ -26,7 +27,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
{
static string kSep = "-";
static string name;
ostringstream buf;
util::StringStream buf;
// type side position id word reotype
if (type == Phrase) {
buf << "phr";
@ -88,7 +89,7 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
ReadWeightMap(i->second);
m_useWeightMap = true;
for (int reoType=0; reoType<=LRModel::MAX; ++reoType) {
ostringstream buf;
util::StringStream buf;
buf << reoType;
m_featureMap2.push_back(m_producer->GetFeatureName(buf.str()));
}

View File

@ -39,6 +39,31 @@ int osmState::Compare(const FFState& otherBase) const
return 0;
}
size_t osmState::hash() const
{
size_t ret = j;
boost::hash_combine(ret, E);
boost::hash_combine(ret, gap);
boost::hash_combine(ret, lmState.length);
return ret;
}
bool osmState::operator==(const FFState& otherBase) const
{
const osmState &other = static_cast<const osmState&>(otherBase);
if (j != other.j)
return false;
if (E != other.E)
return false;
if (gap != other.gap)
return false;
if (lmState.length != other.lmState.length)
return false;
return true;
}
std::string osmState :: getName() const
{
@ -157,11 +182,7 @@ int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
string osmHypothesis :: intToString(int num)
{
std::ostringstream stm;
stm<<num;
return stm.str();
return SPrint(num);
}

View File

@ -17,6 +17,9 @@ class osmState : public FFState
public:
osmState(const lm::ngram::State & val);
int Compare(const FFState& other) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
int getJ()const {
return j;

View File

@ -3,6 +3,7 @@
#include "moses/Hypothesis.h"
#include "moses/TranslationOption.h"
#include "moses/InputPath.h"
#include "util/string_stream.hh"
using namespace std;
@ -17,6 +18,21 @@ int PhraseBoundaryState::Compare(const FFState& other) const
return Word::Compare(*m_sourceWord,*(rhs.m_sourceWord));
}
size_t PhraseBoundaryState::hash() const
{
size_t ret = hash_value(*m_targetWord);
boost::hash_combine(ret, hash_value(*m_sourceWord));
return ret;
}
bool PhraseBoundaryState::operator==(const FFState& other) const
{
const PhraseBoundaryState& rhs = dynamic_cast<const PhraseBoundaryState&>(other);
bool ret = *m_targetWord == *rhs.m_targetWord && *m_sourceWord == *rhs.m_sourceWord;
return ret;
}
/////////////////////////////////////////////////////////////////////////////////////
PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line)
: StatefulFeatureFunction(0, line)
{
@ -46,7 +62,7 @@ void PhraseBoundaryFeature::AddFeatures(
ScoreComponentCollection* scores) const
{
for (size_t i = 0; i < factors.size(); ++i) {
ostringstream name;
util::StringStream name;
name << side << ":";
name << factors[i];
name << ":";

View File

@ -24,6 +24,8 @@ public:
return m_targetWord;
}
virtual int Compare(const FFState& other) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
private:

View File

@ -3,6 +3,7 @@
#include "moses/Hypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
#include "util/string_stream.hh"
namespace Moses
{
@ -25,13 +26,13 @@ void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
size_t sourceLength = source.GetSize();
// create feature names
stringstream nameSource;
util::StringStream nameSource;
nameSource << "s" << sourceLength;
stringstream nameTarget;
util::StringStream nameTarget;
nameTarget << "t" << targetLength;
stringstream nameBoth;
util::StringStream nameBoth;
nameBoth << sourceLength << "," << targetLength;
// increase feature counts

View File

@ -6,7 +6,6 @@
#include <map>
#include "StatelessFeatureFunction.h"
#include "moses/FF/FFState.h"
#include "moses/Word.h"
#include "moses/FactorCollection.h"

View File

@ -21,8 +21,17 @@
namespace Moses
{
size_t PhraseOrientationFeatureState::hash() const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool PhraseOrientationFeatureState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
////////////////////////////////////////////////////////////////////////////////
const std::string PhraseOrientationFeature::MORIENT("M");
const std::string PhraseOrientationFeature::SORIENT("S");
const std::string PhraseOrientationFeature::DORIENT("D");

View File

@ -140,6 +140,9 @@ public:
return 0;
};
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
protected:
static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {

View File

@ -7,6 +7,7 @@
#include "moses/TranslationOption.h"
#include "moses/InputPath.h"
#include "util/string_piece_hash.hh"
#include "util/string_stream.hh"
#include "util/exception.hh"
using namespace std;
@ -126,7 +127,8 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
const bool use_topicid_prob = isnt.GetUseTopicIdAndProb();
// compute pair
ostringstream pair;
util::StringStream pair;
pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
@ -145,7 +147,8 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
if(use_topicid) {
// use topicid as trigger
const long topicid = isnt.GetTopicId();
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
if (topicid == -1)
feature << "unk";
@ -159,13 +162,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
// use topic probabilities
const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
if (atol(topicid_prob[0].c_str()) == -1) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_unk_";
feature << pair.str();
scoreBreakdown.SparsePlusEquals(feature.str(), 1);
} else {
for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
feature << topicid_prob[i];
feature << "_";
@ -179,7 +182,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
const long docid = isnt.GetDocumentId();
for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
string sourceTrigger = *p;
ostringstream namestr;
util::StringStream namestr;
namestr << m_description << "_";
namestr << sourceTrigger;
namestr << "_";
@ -207,7 +210,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();
if (m_unrestricted || sourceTriggerExists) {
ostringstream namestr;
util::StringStream namestr;
namestr << m_description << "_";
namestr << sourceTrigger;
namestr << "~";
@ -237,7 +240,7 @@ void PhrasePairFeature::EvaluateInIsolation(const Phrase &source
, ScoreComponentCollection &estimatedFutureScore) const
{
if (m_simple) {
ostringstream namestr;
util::StringStream namestr;
namestr << m_description << "_";
namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
for (size_t i = 1; i < source.GetSize(); ++i) {

View File

@ -4,7 +4,7 @@
#include "moses/ScoreComponentCollection.h"
#include "moses/FactorCollection.h"
#include <sstream>
#include "util/string_stream.hh"
using namespace std;
@ -58,7 +58,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
}
}
ostringstream namestr;
util::StringStream namestr;
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
const Word &wordT = targetPhrase.GetWord(posT);

View File

@ -16,6 +16,15 @@ public:
}
int Compare(const FFState& other) const;
virtual size_t hash() const {
return (size_t) m_targetLen;
}
virtual bool operator==(const FFState& o) const {
const SkeletonState& other = static_cast<const SkeletonState&>(o);
return m_targetLen == other.m_targetLen;
}
};
class SkeletonStatefulFF : public StatefulFeatureFunction

View File

@ -4,7 +4,6 @@
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include "StatelessFeatureFunction.h"
#include "FFState.h"
#include "moses/Factor.h"
namespace Moses

View File

@ -6,6 +6,7 @@
#include "moses/Sentence.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "SparseHieroReorderingFeature.h"
@ -202,7 +203,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied(
targetLeftRulePos < targetRightRulePos))) {
isMonotone = false;
}
stringstream buf;
util::StringStream buf;
buf << "h_"; //sparse reordering, Huck
if (m_type == SourceLeft || m_type == SourceCombined) {
buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();

View File

@ -10,7 +10,6 @@
#include "moses/Sentence.h"
#include "StatelessFeatureFunction.h"
#include "FFState.h"
namespace Moses
{

View File

@ -17,6 +17,19 @@ int TargetBigramState::Compare(const FFState& other) const
return Word::Compare(m_word,rhs.m_word);
}
size_t TargetBigramState::hash() const
{
std::size_t ret = hash_value(m_word);
return ret;
}
bool TargetBigramState::operator==(const FFState& other) const
{
const TargetBigramState& rhs = dynamic_cast<const TargetBigramState&>(other);
return m_word == rhs.m_word;
}
////////////////////////////////////////////////////////////////////////////////
TargetBigramFeature::TargetBigramFeature(const std::string &line)
:StatefulFeatureFunction(0, line)
{

View File

@ -21,6 +21,8 @@ public:
return m_word;
}
virtual int Compare(const FFState& other) const;
size_t hash() const;
virtual bool operator==(const FFState& other) const;
private:
Word m_word;

View File

@ -37,6 +37,38 @@ int TargetNgramState::Compare(const FFState& other) const
}
}
size_t TargetNgramState::hash() const
{
std::size_t ret = boost::hash_range(m_words.begin(), m_words.end());
return ret;
}
bool TargetNgramState::operator==(const FFState& other) const
{
const TargetNgramState& rhs = dynamic_cast<const TargetNgramState&>(other);
int result;
if (m_words.size() == rhs.m_words.size()) {
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return false;
}
return true;
} else if (m_words.size() < rhs.m_words.size()) {
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return false;
}
return true;
} else {
for (size_t i = 0; i < rhs.m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return false;
}
return true;
}
}
////////////////////////////////////////////////////////////////////////////
TargetNgramFeature::TargetNgramFeature(const std::string &line)
:StatefulFeatureFunction(0, line)
{
@ -108,7 +140,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
// extract all ngrams from current hypothesis
vector<Word> prev_words(tnState->GetWords());
stringstream curr_ngram;
util::StringStream curr_ngram;
bool skip = false;
// include lower order ngrams?
@ -166,7 +198,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
if (cur_hypo.GetWordsBitmap().IsComplete()) {
for (size_t n = m_n; n >= smallest_n; --n) {
stringstream last_ngram;
util::StringStream last_ngram;
skip = false;
for (size_t i = cur_hypo.GetSize() - n + 1; i < cur_hypo.GetSize() && !skip; ++i)
appendNgram(cur_hypo.GetWord(i), skip, last_ngram);
@ -176,7 +208,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
accumulator->PlusEquals(this, last_ngram.str(), 1);
}
}
return NULL;
return new TargetNgramState();
}
// prepare new state
@ -196,7 +228,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
return new TargetNgramState(new_prev_words);
}
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, util::StringStream &ngram) const
{
// const string& w = word.GetFactor(m_factorType)->GetString();
const StringPiece w = word.GetString(m_factorType);
@ -249,7 +281,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
suffixTerminals++;
// everything else
else {
stringstream ngram;
util::StringStream ngram;
ngram << m_baseName;
if (m_factorType == 0)
ngram << factorZero;
@ -360,7 +392,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
suffixTerminals = 0;
// remove duplicates
stringstream curr_ngram;
util::StringStream curr_ngram;
curr_ngram << m_baseName;
curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
curr_ngram << ":";
@ -386,7 +418,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
// remove duplicates
size_t size = contextFactor.size();
if (makePrefix && makeSuffix && (size <= m_n)) {
stringstream curr_ngram;
util::StringStream curr_ngram;
curr_ngram << m_baseName;
for (size_t i = 0; i < size; ++i) {
curr_ngram << (*contextFactor[i]).GetString(m_factorType);
@ -404,7 +436,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const
{
stringstream ngram;
util::StringStream ngram;
size_t size = contextFactor.size();
for (size_t k = 0; k < numberOfStartPos; ++k) {
size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
@ -429,7 +461,7 @@ void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFacto
void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const
{
stringstream ngram;
util::StringStream ngram;
for (size_t k = 0; k < numberOfEndPos; ++k) {
size_t end_pos = contextFactor.size()-1-k-offset;
for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {

View File

@ -12,6 +12,7 @@
#include "moses/LM/SingleFactor.h"
#include "moses/ChartHypothesis.h"
#include "moses/ChartManager.h"
#include "util/string_stream.hh"
namespace Moses
{
@ -19,12 +20,17 @@ namespace Moses
class TargetNgramState : public FFState
{
public:
TargetNgramState(std::vector<Word> &words): m_words(words) {}
TargetNgramState() {}
TargetNgramState(const std::vector<Word> &words): m_words(words) {}
const std::vector<Word> GetWords() const {
return m_words;
}
virtual int Compare(const FFState& other) const;
size_t hash() const;
virtual bool operator==(const FFState& other) const;
private:
std::vector<Word> m_words;
};
@ -171,6 +177,45 @@ public:
}
return 0;
}
size_t hash() const {
// not sure if this is correct
size_t ret;
ret = m_startPos;
boost::hash_combine(ret, m_endPos);
boost::hash_combine(ret, m_inputSize);
// prefix
if (m_startPos > 0) { // not for "<s> ..."
boost::hash_combine(ret, hash_value(GetPrefix()));
}
if (m_endPos < m_inputSize - 1) { // not for "... </s>"
boost::hash_combine(ret, hash_value(GetSuffix()));
}
return ret;
}
virtual bool operator==(const FFState& o) const {
const TargetNgramChartState &other =
static_cast<const TargetNgramChartState &>( o );
// prefix
if (m_startPos > 0) { // not for "<s> ..."
int ret = GetPrefix().Compare(other.GetPrefix());
if (ret != 0)
return false;
}
if (m_endPos < m_inputSize - 1) { // not for "... </s>"
int ret = GetSuffix().Compare(other.GetSuffix());
if (ret != 0)
return false;
}
return true;
}
};
/** Sets the features of observed ngrams.
@ -222,7 +267,7 @@ private:
std::string m_baseName;
void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
void appendNgram(const Word& word, bool& skip, util::StringStream& ngram) const;
void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
size_t numberOfStartPos = 1, size_t offset = 0) const;
void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,

View File

@ -179,7 +179,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
if (m_simple) {
// construct feature name
stringstream featureName;
util::StringStream featureName;
featureName << m_description << "_";
featureName << sourceWord;
featureName << "~";
@ -193,7 +193,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
if(use_topicid) {
// use topicid as trigger
const long topicid = sentence.GetTopicId();
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
if (topicid == -1)
feature << "unk";
@ -209,7 +209,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
// use topic probabilities
const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
if (atol(topicid_prob[0].c_str()) == -1) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_unk_";
feature << sourceWord;
feature << "~";
@ -217,7 +217,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
scoreBreakdown.SparsePlusEquals(feature.str(), 1);
} else {
for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
feature << topicid_prob[i];
feature << "_";
@ -233,7 +233,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
const long docid = input.GetDocumentId();
for (boost::unordered_set<std::string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
string sourceTrigger = *p;
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
feature << sourceTrigger;
feature << "_";
@ -248,7 +248,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
size_t globalSourceIndex = inputPath.GetWordsRange().GetStartPos() + sourceIndex;
if (!m_domainTrigger && globalSourceIndex == 0) {
// add <s> trigger feature for source
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
feature << "<s>,";
feature << sourceWord;
@ -278,7 +278,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
if (m_domainTrigger) {
if (sourceTriggerExists) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
feature << sourceTrigger;
feature << "_";
@ -288,7 +288,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
scoreBreakdown.SparsePlusEquals(feature.str(), 1);
}
} else if (m_unrestricted || sourceTriggerExists) {
stringstream feature;
util::StringStream feature;
feature << m_description << "_";
if (contextIndex < globalSourceIndex) {
feature << sourceTrigger;

View File

@ -5,7 +5,6 @@
#include "moses/FactorCollection.h"
#include "moses/Sentence.h"
#include "FFState.h"
#include "StatelessFeatureFunction.h"
namespace Moses
@ -43,10 +42,6 @@ public:
void Load();
const FFState* EmptyHypothesisState(const InputType &) const {
return new DummyState();
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase

View File

@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureVector.h"
#include "util/string_piece_hash.hh"
#include "util/string_stream.hh"
using namespace std;
@ -204,7 +205,7 @@ void FVector::save(const string& filename) const
{
ofstream out(filename.c_str());
if (!out) {
ostringstream msg;
util::StringStream msg;
msg << "Unable to open " << filename;
throw runtime_error(msg.str());
}

View File

@ -12,6 +12,7 @@
#include <sstream>
#include <vector>
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "TypeDef.h"
#include "Util.h"
@ -147,7 +148,7 @@ inline OFF_T fTell(FILE* f)
inline void fSeek(FILE* f,OFF_T o)
{
if(FSEEKO(f,o,SEEK_SET)<0) {
std::stringstream strme;
util::StringStream strme;
strme << "ERROR: could not fseeko position " << o <<"\n";
if(o==InvalidOffT) strme << "You tried to seek for 'InvalidOffT'!\n";
UTIL_THROW2(strme.str());

View File

@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputFileStream.h"
#include "StaticData.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
using namespace std;
@ -84,9 +85,9 @@ void GenerationDictionary::Load()
size_t numFeaturesInFile = token.size() - 2;
if (numFeaturesInFile < numFeatureValuesInConfig) {
stringstream strme;
util::StringStream strme;
strme << m_filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
<< " feature values, but found " << numFeaturesInFile << std::endl;
<< " feature values, but found " << numFeaturesInFile << "\n";
throw strme.str();
}
std::vector<float> scores(numFeatureValuesInConfig, 0.0f);

View File

@ -195,35 +195,6 @@ Create(Manager& manager, InputType const& m_source,
#endif
}
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
much quicker.
*/
int
Hypothesis::
RecombineCompare(const Hypothesis &compare) const
{
// -1 = this < compare
// +1 = this > compare
// 0 = this ==compare
int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
if (comp != 0)
return comp;
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
// TODO: Can this situation actually occur?
comp = int(m_ffStates[i] != NULL) - int(compare.m_ffStates[i] != NULL);
} else {
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
}
if (comp != 0) return comp;
}
return 0;
}
void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
@ -647,6 +618,40 @@ GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
return ret;
}
size_t Hypothesis::hash() const
{
size_t seed;
// coverage
seed = m_sourceCompleted.hash();
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState *state = m_ffStates[i];
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
return seed;
}
bool Hypothesis::operator==(const Hypothesis& other) const
{
// coverage
if (m_sourceCompleted != other.m_sourceCompleted) {
return false;
}
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState &thisState = *m_ffStates[i];
const FFState &otherState = *other.m_ffStates[i];
if (thisState != otherState) {
return false;
}
}
return true;
}
#ifdef HAVE_XMLRPC_C
void
Hypothesis::

View File

@ -197,8 +197,6 @@ public:
return m_sourceCompleted.IsComplete();
}
int RecombineCompare(const Hypothesis &compare) const;
void GetOutputPhrase(Phrase &out) const;
void ToStream(std::ostream& out) const {
@ -211,7 +209,7 @@ public:
if (m_prevHypo != NULL) {
m_prevHypo->ToStream(out);
}
out << (Phrase) GetCurrTargetPhrase();
out << (const Phrase&) GetCurrTargetPhrase();
}
std::string GetOutputString() const {
@ -288,13 +286,16 @@ public:
// creates a map of TARGET positions which should be replaced by word using placeholder
std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const;
// for unordered_set in stack
size_t hash() const;
bool operator==(const Hypothesis& other) const;
#ifdef HAVE_XMLRPC_C
void OutputWordAlignment(std::vector<xmlrpc_c::value>& out) const;
void OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest) const;
#endif
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
@ -318,21 +319,17 @@ struct CompareHypothesisTotalScore {
#define FREEHYPO(hypo) delete hypo
#endif
/** defines less-than relation on hypotheses.
* The particular order is not important for us, we need just to figure out
* which hypothesis are equal based on:
* the last n-1 target words are the same
* and the covers (source words translated) are the same
* Directly using RecombineCompare is unreliable because the Compare methods
* of some states are based on archictecture-dependent pointer comparisons.
* That's why we use the hypothesis IDs instead.
*/
class HypothesisRecombinationOrderer
class HypothesisRecombinationUnordered
{
public:
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
return (hypoA->RecombineCompare(*hypoB) < 0);
size_t operator()(const Hypothesis* hypo) const {
return hypo->hash();
}
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
return (*hypoA) == (*hypoB);
}
};
}

View File

@ -3,6 +3,7 @@
#include <vector>
#include <set>
#include <boost/unordered_set.hpp>
#include "Hypothesis.h"
#include "WordsBitmap.h"
@ -18,7 +19,7 @@ class HypothesisStack
{
protected:
typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
typedef boost::unordered_set< Hypothesis*, HypothesisRecombinationUnordered, HypothesisRecombinationUnordered > _HCType;
_HCType m_hypos; /**< contains hypotheses */
Manager& m_manager;

View File

@ -31,4 +31,16 @@ int BackwardLMState::Compare(const FFState &o) const
return state.left.Compare(other.state.left);
}
size_t BackwardLMState::hash() const
{
size_t ret = hash_value(state.left);
return ret;
}
bool BackwardLMState::operator==(const FFState& o) const
{
const BackwardLMState &other = static_cast<const BackwardLMState &>(o);
bool ret = state.left == other.state.left;
return ret;
}
}

View File

@ -47,14 +47,11 @@ class BackwardLMState : public FFState
public:
/*
int Compare(const FFState &o) const {
const BackwardLMState &other = static_cast<const BackwardLMState &>(o);
return state.left.Compare(other.state.left);
}
*/
int Compare(const FFState &o) const;
size_t hash() const;
virtual bool operator==(const FFState& other) const;
// Allow BackwardLanguageModel to access the private members of this class
template <class Model> friend class BackwardLanguageModel;

View File

@ -38,6 +38,15 @@ public:
}
int Compare(const FFState& other) const;
virtual size_t hash() const {
return m_hash;
}
virtual bool operator==(const FFState& other) const {
const BilingualLMState &otherState = static_cast<const BilingualLMState&>(other);
return m_hash == otherState.m_hash;
}
};
class BilingualLM : public StatefulFeatureFunction

View File

@ -146,6 +146,7 @@ public:
}
int Compare(const FFState& o) const {
/*
const LanguageModelChartState &other =
dynamic_cast<const LanguageModelChartState &>( o );
@ -164,7 +165,49 @@ public:
return ret;
}
return 0;
*/
}
size_t hash() const {
size_t ret;
// prefix
ret = m_hypo.GetCurrSourceRange().GetStartPos() > 0;
if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for "<s> ..."
size_t hash = hash_value(GetPrefix());
boost::hash_combine(ret, hash);
}
// suffix
size_t inputSize = m_hypo.GetManager().GetSource().GetSize();
boost::hash_combine(ret, m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1);
if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... </s>"
size_t hash = m_lmRightContext->hash();
boost::hash_combine(ret, hash);
}
return ret;
}
virtual bool operator==(const FFState& o) const {
const LanguageModelChartState &other =
dynamic_cast<const LanguageModelChartState &>( o );
// prefix
if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for "<s> ..."
bool ret = GetPrefix() == other.GetPrefix();
if (ret == false)
return false;
}
// suffix
size_t inputSize = m_hypo.GetManager().GetSource().GetSize();
if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... </s>"
bool ret = (*other.GetRightContext()) == (*m_lmRightContext);
return ret;
}
return true;
}
};
} // namespace

View File

@ -68,6 +68,13 @@ public:
else return state.compare(o.state);
}
virtual size_t hash() const {
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const {
UTIL_THROW2("TODO:Haven't figure this out yet");
}
DALM::State &get_state() {
return state;
}
@ -178,6 +185,14 @@ public:
if(rightContext.get_count() > o.rightContext.get_count()) return 1;
return rightContext.compare(o.rightContext);
}
virtual size_t hash() const {
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const {
UTIL_THROW2("TODO:Haven't figure this out yet");
}
};
LanguageModelDALM::LanguageModelDALM(const std::string &line)

View File

@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "lm/model.hh"
#include "util/exception.hh"
#include "util/tokenize_piece.hh"
#include "util/string_stream.hh"
#include "Ken.h"
#include "Base.h"
@ -61,6 +62,17 @@ struct KenLMState : public FFState {
if (state.length > other.state.length) return 1;
return std::memcmp(state.words, other.state.words, sizeof(lm::WordIndex) * state.length);
}
virtual size_t hash() const {
size_t ret = hash_value(state);
return ret;
}
virtual bool operator==(const FFState& o) const {
const KenLMState &other = static_cast<const KenLMState &>(o);
bool ret = state == other.state;
return ret;
}
};
///*
@ -307,6 +319,16 @@ public:
return ret;
}
size_t hash() const {
size_t ret = hash_value(m_state);
return ret;
}
virtual bool operator==(const FFState& o) const {
const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM &>(o);
bool ret = m_state == other.m_state;
return ret;
}
private:
lm::ngram::ChartState m_state;
};
@ -383,7 +405,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
} else if (word.IsNonTerminal()) {
// Non-terminal is first so we can copy instead of rescoring.
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->state[featureID])->GetChartState();
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->states[featureID])->GetChartState();
float prob = UntransformLMScore(
pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.BeginNonTerminal(prevState, prob);
@ -395,7 +417,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
const Word &word = target.GetWord(phrasePos);
if (word.IsNonTerminal()) {
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->state[featureID])->GetChartState();
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->states[featureID])->GetChartState();
float prob = UntransformLMScore(
pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.NonTerminal(prevState, prob);
@ -466,7 +488,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
++argument; // KENLM
stringstream line;
util::StringStream line;
line << "KENLM";
for (; argument; ++argument) {

View File

@ -16,6 +16,15 @@ struct PointerState : public FFState {
else if (other.lmstate < lmstate) return -1;
return 0;
}
virtual size_t hash() const {
return (size_t) lmstate;
}
virtual bool operator==(const FFState& other) const {
const PointerState& o = static_cast<const PointerState&>(other);
return lmstate == o.lmstate;
}
};
} // namespace

View File

@ -5,6 +5,7 @@
#include <sys/types.h>
#include "Remote.h"
#include "moses/Factor.h"
#include "util/string_stream.hh"
#if !defined(_WIN32) && !defined(_WIN64)
#include <arpa/inet.h>
@ -96,7 +97,7 @@ LMResult LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFa
cur->boState = *reinterpret_cast<const State*>(&m_curId);
++m_curId;
std::ostringstream os;
util::StringStream os;
os << "prob ";
if (event_word == NULL) {
os << "</s>";
@ -111,9 +112,8 @@ LMResult LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFa
os << ' ' << f->GetString();
}
}
os << std::endl;
std::string out = os.str();
write(sock, out.c_str(), out.size());
os << "\n";
write(sock, os.str().c_str(), os.str().size());
char res[6];
int r = read(sock, res, 6);
int errors = 0;

View File

@ -56,6 +56,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "util/exception.hh"
#include "util/random.hh"
#include "util/string_stream.hh"
using namespace std;
@ -1971,7 +1972,7 @@ void Manager::OutputSearchGraphSLF() const
// Output search graph in HTK standard lattice format (SLF)
bool slf = staticData.GetOutputSearchGraphSLF();
if (slf) {
stringstream fileName;
util::StringStream fileName;
string dir;
staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");

View File

@ -11,7 +11,7 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
distinctE(0)
{
m_numInputScores = 0;
m_inputFeature = &InputFeature::Instance();
m_inputFeature = InputFeature::InstancePtr();
if (m_inputFeature) {
const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];

View File

@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "InputFileStream.h"
#include "StaticData.h"
#include "util/string_stream.hh"
#include "util/exception.hh"
#include "util/random.hh"
#include <boost/program_options.hpp>
@ -701,7 +702,7 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName)
size_t currOldInd = 0;
for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
stringstream ptLine;
util::StringStream ptLine;
vector<string> token = Tokenize(translationVector[currDict]);
@ -860,7 +861,7 @@ ConvertWeightArgsDistortion()
}
SetWeight("LexicalReordering", indTable, weights);
stringstream strme;
util::StringStream strme;
strme << "LexicalReordering "
<< "type=" << toks[1] << " ";
@ -1007,7 +1008,7 @@ ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string
}
SetWeight(newWeightName, indTable, weights);
stringstream strme;
util::StringStream strme;
strme << "Generation "
<< "input-factor=" << modelToks[0] << " "
<< "output-factor=" << modelToks[1] << " "

View File

@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h" // GetMaxNumFactors
#include "util/string_piece.hh"
#include "util/string_stream.hh"
#include "util/tokenize_piece.hh"
using namespace std;
@ -117,7 +118,7 @@ std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
{
bool markUnknown = StaticData::Instance().GetMarkUnknown();
stringstream strme;
util::StringStream strme;
for (size_t pos = 0 ; pos < GetSize() ; pos++) {
if (markUnknown && GetWord(pos).IsOOV()) {
strme << StaticData::Instance().GetUnknownWordPrefix();

View File

@ -4,6 +4,7 @@
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "ScoreComponentCollection.h"
#include "StaticData.h"
#include "moses/FF/StatelessFeatureFunction.h"
@ -88,9 +89,8 @@ void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float s
{
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (starts_with(name.str(), prefix))
const std::string &name = i->first.name();
if (starts_with(name, prefix))
m_scores[i->first] = i->second * scalar;
}
}
@ -101,9 +101,8 @@ size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp)
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
size_t weights = 0;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (starts_with(name.str(), prefix))
const std::string &name = i->first.name();
if (starts_with(name, prefix))
weights++;
}
return weights;
@ -215,7 +214,7 @@ void ScoreComponentCollection::Save(const string& filename) const
{
ofstream out(filename.c_str());
if (!out) {
ostringstream msg;
util::StringStream msg;
msg << "Unable to open " << filename;
throw runtime_error(msg.str());
}

View File

@ -640,7 +640,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
switch (decodeType) {
case Translate:
if(index>=pts.size()) {
stringstream strme;
util::StringStream strme;
strme << "No phrase dictionary with index "
<< index << " available!";
UTIL_THROW(util::Exception, strme.str());
@ -649,7 +649,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
break;
case Generate:
if(index>=gens.size()) {
stringstream strme;
util::StringStream strme;
strme << "No generation dictionary with index "
<< index << " available!";
UTIL_THROW(util::Exception, strme.str());

View File

@ -93,7 +93,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
SVertex *head = new SVertex();
head->best = hyperedge;
head->pvertex = 0; // FIXME???
head->state.resize(
head->states.resize(
StatefulFeatureFunction::GetStatefulFeatureFunctions().size());
hyperedge->head = head;
@ -131,7 +131,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
if (!staticData.IsFeatureFunctionIgnored(*ffs[i])) {
head->state[i] =
head->states[i] =
ffs[i]->EvaluateWhenApplied(*hyperedge, i,
&hyperedge->label.scoreBreakdown);
}

View File

@ -4,6 +4,7 @@
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/StaticData.h"
#include "util/string_stream.hh"
namespace Moses
{
@ -55,7 +56,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
TargetPhrase *targetPhrase = new TargetPhrase();
std::ostringstream alignmentSS;
util::StringStream alignmentSS;
for (std::size_t i = 0; i < e.tail.size(); ++i) {
const Word &symbol = e.tail[i]->pvertex.symbol;
if (symbol.IsNonTerminal()) {

View File

@ -285,7 +285,7 @@ void Manager<RuleMatcher>::RecombineAndSort(
// head pointers are updated to point to the vertex instances in the map and
// any 'duplicate' vertices are deleted.
// TODO Set?
typedef std::map<SVertex *, SVertex *, SVertexRecombinationOrderer> Map;
typedef boost::unordered_map<SVertex *, SVertex *, SVertexRecombinationUnordered, SVertexRecombinationUnordered> Map;
Map map;
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
p != buffer.end(); ++p) {

View File

@ -349,7 +349,7 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
// head pointers are updated to point to the vertex instances in the map and
// any 'duplicate' vertices are deleted.
// TODO Set?
typedef std::map<SVertex *, SVertex *, SVertexRecombinationOrderer> Map;
typedef boost::unordered_map<SVertex *, SVertex *, SVertexRecombinationUnordered, SVertexRecombinationUnordered> Map;
Map map;
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
p != buffer.end(); ++p) {

View File

@ -1,7 +1,5 @@
#include "SVertex.h"
#include "moses/FF/FFState.h"
#include "SHyperedge.h"
namespace Moses
@ -18,11 +16,38 @@ SVertex::~SVertex()
delete *p;
}
// Delete FFState objects.
for (std::vector<FFState*>::iterator p = state.begin();
p != state.end(); ++p) {
for (std::vector<FFState*>::iterator p = states.begin();
p != states.end(); ++p) {
delete *p;
}
}
size_t SVertex::hash() const
{
size_t seed;
// states
for (size_t i = 0; i < states.size(); ++i) {
const FFState *state = states[i];
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
return seed;
}
bool SVertex::operator==(const SVertex& other) const
{
// states
for (size_t i = 0; i < states.size(); ++i) {
const FFState &thisState = *states[i];
const FFState &otherState = *other.states[i];
if (thisState != otherState) {
return false;
}
}
return true;
}
} // Syntax
} // Moses

View File

@ -1,6 +1,7 @@
#pragma once
#include <vector>
#include <stddef.h>
namespace Moses
{
@ -23,7 +24,12 @@ struct SVertex {
SHyperedge *best;
std::vector<SHyperedge*> recombined;
const PVertex *pvertex;
std::vector<FFState*> state;
std::vector<FFState*> states;
// for unordered_set in stack
size_t hash() const;
bool operator==(const SVertex& other) const;
};
} // Syntax

View File

@ -9,26 +9,18 @@ namespace Moses
namespace Syntax
{
struct SVertexRecombinationOrderer {
class SVertexRecombinationUnordered
{
public:
bool operator()(const SVertex &x, const SVertex &y) const {
int comp = 0;
for (std::size_t i = 0; i < x.state.size(); ++i) {
if (x.state[i] == NULL || y.state[i] == NULL) {
comp = x.state[i] - y.state[i];
} else {
comp = x.state[i]->Compare(*y.state[i]);
}
if (comp != 0) {
return comp < 0;
}
}
return false;
size_t operator()(const SVertex* hypo) const {
return hypo->hash();
}
bool operator()(const SVertex *x, const SVertex *y) const {
return operator()(*x, *y);
bool operator()(const SVertex* hypoA, const SVertex* hypoB) const {
return (*hypoA) == (*hypoB);
}
};
} // Syntax

View File

@ -47,7 +47,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
TargetPhrase *targetPhrase = new TargetPhrase();
std::ostringstream alignmentSS;
util::StringStream alignmentSS;
for (std::size_t i = 0; i < node.children.size(); ++i) {
const Word &symbol = node.children[i]->pvertex.symbol;
if (symbol.IsNonTerminal()) {

View File

@ -245,7 +245,7 @@ void Manager<RuleMatcher>::RecombineAndSort(
// head pointers are updated to point to the vertex instances in the map and
// any 'duplicate' vertices are deleted.
// TODO Set?
typedef std::map<SVertex *, SVertex *, SVertexRecombinationOrderer> Map;
typedef boost::unordered_map<SVertex *, SVertex *, SVertexRecombinationUnordered, SVertexRecombinationUnordered> Map;
Map map;
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
p != buffer.end(); ++p) {

View File

@ -10,6 +10,7 @@
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/LM/Base.h"
#include "util/string_stream.hh"
using namespace Moses;
@ -40,8 +41,9 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
if (doLMStats)
lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
for (; tpi != translationPath.end(); ++tpi) {
std::ostringstream sms;
std::ostringstream tms;
util::StringStream sms;
util::StringStream tms;
std::string target = (*tpi)->GetTargetPhraseStringRep();
std::string source = (*tpi)->GetSourcePhraseStringRep();
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BlockHashIndex.h"
#include "CmphStringVectorAdapter.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
#ifdef HAVE_CMPH
#include "cmph.h"
@ -98,11 +99,11 @@ size_t BlockHashIndex::GetFprint(const char* key) const
size_t BlockHashIndex::GetHash(size_t i, const char* key)
{
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(m_hashes[i] == 0)
LoadRange(i);
//#ifdef WITH_THREADS
// boost::mutex::scoped_lock lock(m_mutex);
//#endif
//if(m_hashes[i] == 0)
//LoadRange(i);
#ifdef HAVE_CMPH
size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key));
#else
@ -322,9 +323,10 @@ size_t BlockHashIndex::GetSize() const
void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
{
#ifdef WITH_THREADS
/*
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
#endif
size_t n = m_hashes.size() * ratio;
size_t max = n * (1 + tolerance);
if(m_numLoadedRanges > max) {
@ -338,7 +340,7 @@ void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance));
it != lastLoaded.rend(); it++)
DropRange(it->second);
}
}*/
}
void BlockHashIndex::CalcHash(size_t current, void* source_void)
@ -366,10 +368,10 @@ void BlockHashIndex::CalcHash(size_t current, void* source_void)
if(lastKey > temp) {
if(source->nkeys != 2 || temp != "###DUMMY_KEY###") {
std::stringstream strme;
strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
strme << "1: " << lastKey << std::endl;
strme << "2: " << temp << std::endl;
util::StringStream strme;
strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n";
strme << "1: " << lastKey << "\n";
strme << "2: " << temp << "\n";
UTIL_THROW2(strme.str());
}
}

View File

@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StringVector.h"
#include "PackedArray.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
#ifdef WITH_THREADS
#include "moses/ThreadPool.h"
@ -145,10 +146,10 @@ public:
size_t current = m_landmarks.size();
if(m_landmarks.size() && m_landmarks.back().str() >= keys[0]) {
std::stringstream strme;
strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
strme << "1: " << m_landmarks.back().str() << std::endl;
strme << "2: " << keys[0] << std::endl;
util::StringStream strme;
strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n";
strme << "1: " << m_landmarks.back().str() << "\n";
strme << "2: " << keys[0] << "\n";
UTIL_THROW2(strme.str());
}

View File

@ -155,10 +155,12 @@ LexicalReorderingTableCompact::
Load(std::string filePath)
{
std::FILE* pFile = std::fopen(filePath.c_str(), "r");
if(m_inMemory)
m_hash.Load(pFile);
else
m_hash.LoadIndex(pFile);
UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
//if(m_inMemory)
m_hash.Load(pFile);
//else
//m_hash.LoadIndex(pFile);
size_t read = 0;
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);

View File

@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <algorithm>
#include <sys/stat.h>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/thread/tss.hpp>
#include "PhraseDictionaryCompact.h"
#include "moses/FactorCollection.h"
@ -43,6 +44,8 @@ using namespace boost::algorithm;
namespace Moses
{
typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
:PhraseDictionary(line, true)
,m_inMemory(true)
@ -75,12 +78,12 @@ void PhraseDictionaryCompact::Load()
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
size_t indexSize;
if(m_inMemory)
// Load source phrase index into memory
indexSize = m_hash.Load(pFile);
else
// Keep source phrase index on disk
indexSize = m_hash.LoadIndex(pFile);
//if(m_inMemory)
// Load source phrase index into memory
indexSize = m_hash.Load(pFile);
// else
// Keep source phrase index on disk
//indexSize = m_hash.LoadIndex(pFile);
size_t coderSize = m_phraseDecoder->Load(pFile);
@ -162,13 +165,9 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
{
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_sentenceMutex);
PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
#else
PhraseCache &ref = m_sentenceCache;
#endif
ref.push_back(tpc);
if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache());
m_sentenceCache->push_back(tpc);
}
void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
@ -176,23 +175,16 @@ void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &source)
{
if(!m_inMemory)
m_hash.KeepNLastRanges(0.01, 0.2);
if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache());
m_phraseDecoder->PruneCache();
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_sentenceMutex);
PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
#else
PhraseCache &ref = m_sentenceCache;
#endif
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++)
for(PhraseCache::iterator it = m_sentenceCache->begin();
it != m_sentenceCache->end(); it++)
delete *it;
PhraseCache temp;
temp.swap(ref);
temp.swap(*m_sentenceCache);
ReduceCache();
}

View File

@ -52,13 +52,8 @@ protected:
bool m_useAlignmentInfo;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
#ifdef WITH_THREADS
boost::mutex m_sentenceMutex;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
#else
typedef PhraseCache SentenceCache;
#endif
SentenceCache m_sentenceCache;
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
static SentenceCache m_sentenceCache;
BlockHashIndex m_hash;
PhraseDecoder* m_phraseDecoder;

View File

@ -0,0 +1,32 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "TargetPhraseCollectionCache.h"
namespace Moses
{
boost::thread_specific_ptr<typename TargetPhraseCollectionCache::CacheMap>
TargetPhraseCollectionCache::m_phraseCache;
}

View File

@ -26,12 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set>
#include <vector>
#ifdef WITH_THREADS
#ifdef BOOST_HAS_PTHREADS
#include <boost/thread/mutex.hpp>
#endif
#endif
#include <boost/thread/tss.hpp>
#include <boost/shared_ptr.hpp>
#include "moses/Phrase.h"
@ -63,12 +58,7 @@ private:
};
typedef std::map<Phrase, LastUsed> CacheMap;
CacheMap m_phraseCache;
#ifdef WITH_THREADS
boost::mutex m_mutex;
#endif
static boost::thread_specific_ptr<CacheMap> m_phraseCache;
public:
@ -80,31 +70,37 @@ public:
}
iterator Begin() {
return m_phraseCache.begin();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->begin();
}
const_iterator Begin() const {
return m_phraseCache.begin();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->begin();
}
iterator End() {
return m_phraseCache.end();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->end();
}
const_iterator End() const {
return m_phraseCache.end();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->end();
}
/** retrieve translations for source phrase from persistent cache **/
void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv,
size_t bitsLeft = 0, size_t maxRank = 0) {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
// check if source phrase is already in cache
iterator it = m_phraseCache.find(sourcePhrase);
if(it != m_phraseCache.end())
iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end())
// if found, just update clock
it->second.m_clock = clock();
else {
@ -113,19 +109,17 @@ public:
TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
tpv_temp->resize(maxRank);
std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
(*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
} else
m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
(*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
}
}
std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase &sourcePhrase) {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
iterator it = m_phraseCache.find(sourcePhrase);
if(it != m_phraseCache.end()) {
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end()) {
LastUsed &lu = it->second;
lu.m_clock = clock();
return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
@ -135,34 +129,31 @@ public:
// if cache full, reduce
void Prune() {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(m_phraseCache.size() > m_max * (1 + m_tolerance)) {
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
typedef std::set<std::pair<clock_t, Phrase> > Cands;
Cands cands;
for(CacheMap::iterator it = m_phraseCache.begin();
it != m_phraseCache.end(); it++) {
for(CacheMap::iterator it = m_phraseCache->begin();
it != m_phraseCache->end(); it++) {
LastUsed &lu = it->second;
cands.insert(std::make_pair(lu.m_clock, it->first));
}
for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
const Phrase& p = it->second;
m_phraseCache.erase(p);
m_phraseCache->erase(p);
if(m_phraseCache.size() < (m_max * (1 - m_tolerance)))
if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))
break;
}
}
}
void CleanUp() {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
m_phraseCache.clear();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
m_phraseCache->clear();
}
};

View File

@ -17,6 +17,7 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
@ -38,7 +39,7 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
} else if (m_mode == "all" || m_mode == "all-restrict") {
UTIL_THROW2("Implementation has moved: use PhraseDictionaryGroup with restrict=true/false");
} else {
ostringstream msg;
util::StringStream msg;
msg << "combination mode unknown: " << m_mode;
throw runtime_error(msg.str());
}
@ -210,7 +211,7 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
raw_weights.push_back(1.0/m_numModels); //uniform weights created online
}
} else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
std::stringstream strme;
util::StringStream strme;
strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ".";
UTIL_THROW(util::Exception, strme.str());
} else {

View File

@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "util/exception.hh"
#include "util/tokenize.hh"
#include "util/string_stream.hh"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
using namespace std;
@ -56,7 +57,7 @@ void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, cons
else if (m_mode == "interpolate")
m_combineFunction = LinearInterpolationFromCounts;
else {
ostringstream msg;
util::StringStream msg;
msg << "combination mode unknown: " << m_mode;
throw runtime_error(msg.str());
}

View File

@ -15,6 +15,7 @@
#include "moses/PDTAimp.h"
#include "moses/TranslationTask.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
using namespace std;
@ -52,7 +53,7 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(ttasksptr const& ttask)
vector<float> weight = staticData.GetWeights(this);
if(m_numScoreComponents!=weight.size()) {
std::stringstream strme;
util::StringStream strme;
UTIL_THROW2("ERROR: mismatch of number of scaling factors: " << weight.size()
<< " " << m_numScoreComponents);
}

View File

@ -126,14 +126,14 @@ void ReformatHieroRule(const string &lineOrig, string &out)
ReformatHieroRule(1, targetPhraseString, ntAlign);
ReformateHieroScore(scoreString);
stringstream align;
util::StringStream align;
map<size_t, pair<size_t, size_t> >::const_iterator iterAlign;
for (iterAlign = ntAlign.begin(); iterAlign != ntAlign.end(); ++iterAlign) {
const pair<size_t, size_t> &alignPoint = iterAlign->second;
align << alignPoint.first << "-" << alignPoint.second << " ";
}
stringstream ret;
util::StringStream ret;
ret << sourcePhraseString << " ||| "
<< targetPhraseString << " ||| "
<< scoreString << " ||| "

View File

@ -7,13 +7,14 @@
//
#include <iostream>
#include "util/string_stream.hh"
#include "SentenceAlignment.h"
namespace tmmt
{
std::string SentenceAlignment::getTargetString(const Vocabulary &vocab) const
{
std::stringstream strme;
util::StringStream strme;
for (size_t i = 0; i < target.size(); ++i) {
const WORD &word = vocab.GetWord(target[i]);
strme << word << " ";

View File

@ -12,6 +12,7 @@
#include <sstream>
#include <vector>
#include "Vocabulary.h"
#include "util/string_stream.hh"
namespace tmmt
{
@ -27,7 +28,7 @@ struct SentenceAlignment {
std::string getTargetString(const Vocabulary &vocab) const;
std::string getAlignmentString() const {
std::stringstream strme;
util::StringStream strme;
for (size_t i = 0; i < alignment.size(); ++i) {
const std::pair<int,int> &alignPair = alignment[i];
strme << alignPair.first << "-" << alignPair.second << " ";

View File

@ -483,14 +483,14 @@ SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc)
const ScorePair* inputScore = inputPath.GetInputScore();
if (inputScore == NULL) return;
const InputFeature &inputFeature = InputFeature::Instance();
const InputFeature *inputFeature = InputFeature::InstancePtr();
const std::vector<TranslationOption*> &transOpts = oldPtoc.GetList();
for (size_t i = 0; i < transOpts.size(); ++i) {
TranslationOption &transOpt = *transOpts[i];
ScoreComponentCollection &scores = transOpt.GetScoreBreakdown();
scores.PlusEquals(&inputFeature, *inputScore);
scores.PlusEquals(inputFeature, *inputScore);
}
}

View File

@ -35,8 +35,8 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl())
if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd);
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
const InputFeature *inputFeature = InputFeature::InstancePtr();
UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified");
size_t inputSize = input.GetSize();
m_inputPathMatrix.resize(inputSize);

View File

@ -28,8 +28,8 @@ TranslationOptionCollectionLattice
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
"Not for models using the legqacy binary phrase table");
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
const InputFeature *inputFeature = InputFeature::InstancePtr();
UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified");
size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
size_t size = input.GetSize();

View File

@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstdlib>
#include <cstring>
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "TypeDef.h"
namespace Moses
@ -343,7 +344,7 @@ inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
template <typename T>
std::string Join(const std::string& delimiter, const std::vector<T>& items)
{
std::ostringstream outstr;
util::StringStream outstr;
if(items.size() == 0) return "";
outstr << items[0];
for(unsigned int i = 1; i < items.size(); i++)
@ -357,7 +358,7 @@ std::string Join(const std::string& delimiter, const std::vector<T>& items)
template<typename It>
std::string Join(const std::string &delim, It begin, It end)
{
std::ostringstream outstr;
util::StringStream outstr;
if (begin != end)
outstr << *begin++;
for ( ; begin != end; ++begin)

View File

@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FactorCollection.h"
#include "StaticData.h" // needed to determine the FactorDelimiter
#include "util/exception.hh"
#include "util/string_stream.hh"
#include "util/tokenize_piece.hh"
using namespace std;
@ -79,7 +80,7 @@ void Word::Merge(const Word &sourceWord)
std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlank) const
{
stringstream strme;
util::StringStream strme;
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
bool firstPass = true;
unsigned int stop = min(max_fax(),factorType.size());
@ -195,7 +196,7 @@ TO_STRING_BODY(Word);
// friend
ostream& operator<<(ostream& out, const Word& word)
{
stringstream strme;
util::StringStream strme;
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
bool firstPass = true;
unsigned int stop = max_fax();
@ -208,7 +209,7 @@ ostream& operator<<(ostream& out, const Word& word)
} else {
strme << factorDelimiter;
}
strme << *factor;
strme << factor->GetString();
}
}
out << strme.str() << " ";

Some files were not shown because too many files have changed in this diff Show More