diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index 7b9bc3a8a..a41582bfa 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -7,3 +7,4 @@ into the source tree from elsewhere: * "bjam-files" is taken from Boost. * "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm + diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp index 3bfb7d2b1..57fae5162 100644 --- a/OnDiskPt/OnDiskWrapper.cpp +++ b/OnDiskPt/OnDiskWrapper.cpp @@ -25,6 +25,7 @@ #include "OnDiskWrapper.h" #include "moses/Factor.h" #include "util/exception.hh" +#include "util/string_stream.hh" using namespace std; @@ -223,7 +224,8 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector &fact { bool isNonTerminal = origWord.IsNonTerminal(); Word *newWord = new Word(isNonTerminal); - stringstream strme; + + util::StringStream strme; size_t factorType = factorsVec[0]; const Moses::Factor *factor = origWord.GetFactor(factorType); diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 9f7c8b725..84de64d9c 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -3145,6 +3145,11 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVector.h + + TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp + TranslationModel/CompactPT/TargetPhraseCollectionCache.h 1 diff --git a/contrib/sigtest-filter/Makefile b/contrib/sigtest-filter/Makefile index 55772929a..c7bbd1c58 100644 --- a/contrib/sigtest-filter/Makefile +++ b/contrib/sigtest-filter/Makefile @@ -1,5 +1,5 @@ SALMDIR=/Users/hieuhoang/workspace/salm -FLAVOR?=o64 +FLAVOR?=o32 INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR) diff --git a/lm/filter/arpa_io.cc b/lm/filter/arpa_io.cc index 2cae60f9a..cf2721ed5 100644 --- a/lm/filter/arpa_io.cc +++ b/lm/filter/arpa_io.cc @@ -33,10 +33,9 @@ template void WriteCounts(Stream &out, const std::vector &number) { - std::string buf; - util::StringStream stream(buf); + util::StringStream stream; WriteCounts(stream, number); - return buf.size(); + return stream.str().size(); } bool IsEntirelyWhiteSpace(const StringPiece &line) { diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index 7de3206fd..5ee99e2db 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main. **/ #include "moses/ExportInterface.h" +#include "util/string_stream.hh" /** main function of the command line version of the decoder **/ int main(int argc, char** argv) diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index 806222029..d06f5fe76 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -181,32 +181,6 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr } } -/** check, if two hypothesis can be recombined. - this is actually a sorting function that allows us to - keep an ordered list of hypotheses. This makes recombination - much quicker. Returns one of 3 possible values: - -1 = this < compare - +1 = this > compare - 0 = this ==compare - \param compare the other hypo to compare to -*/ -int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const -{ - int comp = 0; - - for (unsigned i = 0; i < m_ffStates.size(); ++i) { - if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) - comp = m_ffStates[i] - compare.m_ffStates[i]; - else - comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]); - - if (comp != 0) - return comp; - } - - return 0; -} - /** calculate total score */ void ChartHypothesis::EvaluateWhenApplied() { @@ -325,6 +299,33 @@ void ChartHypothesis::SetWinningHypo(const ChartHypothesis *hypo) m_winningHypo = hypo; } +size_t ChartHypothesis::hash() const +{ + size_t seed; + + // states + for (size_t i = 0; i < m_ffStates.size(); ++i) { + const FFState *state = m_ffStates[i]; + size_t hash = state->hash(); + boost::hash_combine(seed, hash); + } + return seed; + +} + +bool ChartHypothesis::operator==(const ChartHypothesis& other) const +{ + // states + for (size_t i = 0; i < m_ffStates.size(); ++i) { + const FFState &thisState = *m_ffStates[i]; + const FFState &otherState = *other.m_ffStates[i]; + if (thisState != otherState) { + return false; + } + } + return true; +} + TO_STRING_BODY(ChartHypothesis) // friend diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h index 25216c04c..954a6fa3a 100644 --- a/moses/ChartHypothesis.h +++ b/moses/ChartHypothesis.h @@ -146,8 +146,6 @@ public: // leftRightMost: 1=left, 2=right void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const; - int RecombineCompare(const ChartHypothesis &compare) const; - void EvaluateWhenApplied(); void AddArc(ChartHypothesis *loserHypo); @@ -214,6 +212,10 @@ public: return m_winningHypo; } + // for unordered_set in stack + size_t hash() const; + bool operator==(const ChartHypothesis& other) const; + TO_STRING(); }; // class ChartHypothesis diff --git a/moses/ChartHypothesisCollection.cpp b/moses/ChartHypothesisCollection.cpp index 42717c261..745f6f4d2 100644 --- a/moses/ChartHypothesisCollection.cpp +++ b/moses/ChartHypothesisCollection.cpp @@ -167,20 +167,6 @@ void ChartHypothesisCollection::Detach(const HCType::iterator &iter) void ChartHypothesisCollection::Remove(const HCType::iterator &iter) { ChartHypothesis *h = *iter; - - /* - stringstream strme(""); - strme << h->GetOutputPhrase(); - string toFind = "the goal of gene scientists is "; - size_t pos = toFind.find(strme.str()); - - if (pos == 0) - { - cerr << pos << " " << strme.str() << *h << endl; - cerr << *this << endl; - } - */ - Detach(iter); ChartHypothesis::Delete(h); } diff --git a/moses/ChartHypothesisCollection.h b/moses/ChartHypothesisCollection.h index b2464e151..c4297b9b3 100644 --- a/moses/ChartHypothesisCollection.h +++ b/moses/ChartHypothesisCollection.h @@ -42,18 +42,17 @@ public: /** functor to compare (chart) hypotheses by feature function states. * If 2 hypos are equal, according to this functor, then they can be recombined. */ -class ChartHypothesisRecombinationOrderer +class ChartHypothesisRecombinationUnordered { public: - bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const { - // assert in same cell - assert(hypoA->GetCurrSourceRange() == hypoB->GetCurrSourceRange()); - - // shouldn't be mixing hypos with different lhs - assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS()); - - return (hypoA->RecombineCompare(*hypoB) < 0); + size_t operator()(const ChartHypothesis* hypo) const { + return hypo->hash(); } + + bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const { + return (*hypoA) == (*hypoB); + } + }; /** Contains a set of unique hypos that have the same HS non-term. @@ -64,7 +63,8 @@ class ChartHypothesisCollection friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&); protected: - typedef std::set HCType; + //typedef std::set HCType; + typedef boost::unordered_set< ChartHypothesis*, ChartHypothesisRecombinationUnordered, ChartHypothesisRecombinationUnordered > HCType; HCType m_hypos; HypoList m_hyposOrdered; diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 16db99c44..671f0d166 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -70,7 +70,7 @@ ConfusionNet() : InputType() if (SD.IsSyntax()) { m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal()); } - UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified"); + UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified"); } ConfusionNet:: @@ -140,9 +140,9 @@ ReadFormat0(std::istream& in, const std::vector& factorOrder) Clear(); // const StaticData &staticData = StaticData::Instance(); - const InputFeature &inputFeature = InputFeature::Instance(); - size_t numInputScores = inputFeature.GetNumInputScores(); - size_t numRealWordCount = inputFeature.GetNumRealWordsInInput(); + const InputFeature *inputFeature = InputFeature::InstancePtr(); + size_t numInputScores = inputFeature->GetNumInputScores(); + size_t numRealWordCount = inputFeature->GetNumRealWordsInInput(); size_t totalCount = numInputScores + numRealWordCount; bool addRealWordCount = (numRealWordCount > 0); diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index a98964386..093af9ce0 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -48,6 +48,32 @@ int BleuScoreState::Compare(const FFState& o) const return 0; } + +size_t BleuScoreState::hash() const +{ + if (StaticData::Instance().IsSyntax()) + return 0; + + size_t ret = hash_value(m_words); + return ret; +} + +bool BleuScoreState::operator==(const FFState& o) const +{ + if (&o == this) + return true; + + if (StaticData::Instance().IsSyntax()) + return true; + + const BleuScoreState& other = static_cast(o); + int c = m_words.Compare(other.m_words); + if (c == 0) + return true; + + return false; +} + std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) { state.print(out); diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index e1a7f09c7..b6c9f766d 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -26,6 +26,9 @@ public: BleuScoreState(); virtual int Compare(const FFState& other) const; + size_t hash() const; + virtual bool operator==(const FFState& other) const; + void print(std::ostream& out) const; private: diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp index 5485c401a..cd60d93b3 100644 --- a/moses/FF/ConstrainedDecoding.cpp +++ b/moses/FF/ConstrainedDecoding.cpp @@ -29,6 +29,19 @@ int ConstrainedDecodingState::Compare(const FFState& other) const return ret; } +size_t ConstrainedDecodingState::hash() const +{ + size_t ret = hash_value(m_outputPhrase); + return ret; +} + +bool ConstrainedDecodingState::operator==(const FFState& other) const +{ + const ConstrainedDecodingState &otherFF = static_cast(other); + bool ret = m_outputPhrase == otherFF.m_outputPhrase; + return ret; +} + ////////////////////////////////////////////////////////////////// ConstrainedDecoding::ConstrainedDecoding(const std::string &line) :StatefulFeatureFunction(1, line) diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index 67833a1b4..5c61eff38 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -18,6 +18,8 @@ public: ConstrainedDecodingState(const ChartHypothesis &hypo); int Compare(const FFState& other) const; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; const Phrase &GetPhrase() const { return m_outputPhrase; diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp index f7231d9b0..3ed4df36b 100644 --- a/moses/FF/ControlRecombination.cpp +++ b/moses/FF/ControlRecombination.cpp @@ -48,6 +48,33 @@ int ControlRecombinationState::Compare(const FFState& other) const } } +size_t ControlRecombinationState::hash() const +{ + size_t ret; + if (m_ff.GetType() == SameOutput) { + ret = hash_value(m_outputPhrase); + } else { + // compare hypo address. Won't be equal unless they're actually the same hypo + ret = (size_t) m_hypo; + } + return ret; +} + +bool ControlRecombinationState::operator==(const FFState& other) const +{ + const ControlRecombinationState &otherFF = static_cast(other); + + if (m_ff.GetType() == SameOutput) { + int ret = m_outputPhrase.Compare(otherFF.m_outputPhrase); + return ret == 0; + } else { + // compare hypo address. Won't be equal unless they're actually the same hypo + if (m_hypo == otherFF.m_hypo) + return true; + return (m_hypo == otherFF.m_hypo); + } +} + std::vector ControlRecombination::DefaultWeights() const { UTIL_THROW_IF2(m_numScoreComponents, diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index f221f772f..bed671534 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -27,6 +27,8 @@ public: ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff); int Compare(const FFState& other) const; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; const Phrase &GetPhrase() const { return m_outputPhrase; diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index dd2c890d7..7f2d2017a 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -1,6 +1,8 @@ +#include #include #include #include +#include #include "CoveredReferenceFeature.h" #include "moses/ScoreComponentCollection.h" #include "moses/Hypothesis.h" @@ -40,6 +42,17 @@ int CoveredReferenceState::Compare(const FFState& other) const // return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1; } +size_t CoveredReferenceState::hash() const +{ + UTIL_THROW2("TODO:Haven't figure this out yet"); +} + +bool CoveredReferenceState::operator==(const FFState& other) const +{ + UTIL_THROW2("TODO:Haven't figure this out yet"); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index d5873f33e..6265b0a74 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -20,6 +20,9 @@ public: std::multiset m_coveredRef; int Compare(const FFState& other) const; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + }; class CoveredReferenceFeature : public StatefulFeatureFunction diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp index e1571d2a9..9b696e9ce 100644 --- a/moses/FF/DistortionScoreProducer.cpp +++ b/moses/FF/DistortionScoreProducer.cpp @@ -20,6 +20,16 @@ struct DistortionState_traditional : public FFState { if (range.GetEndPos() > o.range.GetEndPos()) return 1; return 0; } + + size_t hash() const { + return range.GetEndPos(); + } + virtual bool operator==(const FFState& other) const { + const DistortionState_traditional& o = + static_cast(other); + return range.GetEndPos() == o.range.GetEndPos(); + } + }; std::vector DistortionScoreProducer::s_staticColl; diff --git a/moses/FF/FFState.h b/moses/FF/FFState.h index 8d9c61b4e..271dce377 100644 --- a/moses/FF/FFState.h +++ b/moses/FF/FFState.h @@ -2,7 +2,8 @@ #define moses_FFState_h #include - +#include +#include "util/exception.hh" namespace Moses { @@ -11,7 +12,13 @@ class FFState { public: virtual ~FFState(); - virtual int Compare(const FFState& other) const = 0; + //virtual int Compare(const FFState& other) const = 0; + virtual size_t hash() const = 0; + virtual bool operator==(const FFState& other) const = 0; + + virtual bool operator!=(const FFState& other) const { + return !(*this == other); + } }; class DummyState : public FFState @@ -21,6 +28,15 @@ public: int Compare(const FFState& other) const { return 0; } + + virtual size_t hash() const { + return 0; + } + + virtual bool operator==(const FFState& other) const { + return true; + } + }; } diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index 08ad26db8..9b1ea473b 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -112,12 +112,10 @@ void FeatureFunction::ParseLine(const std::string &line) if (m_description == "") { size_t index = description_counts.count(nameStub); - ostringstream dstream; - dstream << nameStub; - dstream << index; + string descr = SPrint(nameStub) + SPrint(index); description_counts.insert(nameStub); - m_description = dstream.str(); + m_description = descr; } } diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp index 675af2b6b..15eec019c 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.cpp +++ b/moses/FF/GlobalLexicalModelUnlimited.cpp @@ -5,6 +5,7 @@ #include "moses/Hypothesis.h" #include "moses/TranslationTask.h" #include "util/string_piece_hash.hh" +#include "util/string_stream.hh" using namespace std; @@ -131,7 +132,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo } if (m_biasFeature) { - stringstream feature; + util::StringStream feature; feature << "glm_"; feature << targetString; feature << "~"; @@ -165,7 +166,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo if (m_sourceContext) { if (sourceIndex == 0) { // add trigger feature for source - stringstream feature; + util::StringStream feature; feature << "glm_"; feature << targetString; feature << "~"; @@ -183,7 +184,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo contextExists = FindStringPiece(m_vocabSource, contextString ) != m_vocabSource.end(); if (m_unrestricted || contextExists) { - stringstream feature; + util::StringStream feature; feature << "glm_"; feature << targetString; feature << "~"; @@ -304,7 +305,7 @@ void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo } } } else { - stringstream feature; + util::StringStream feature; feature << "glm_"; feature << targetString; feature << "~"; @@ -323,7 +324,7 @@ void GlobalLexicalModelUnlimited::AddFeature(ScoreComponentCollection* accumulat StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger, StringPiece targetWord) const { - stringstream feature; + util::StringStream feature; feature << "glm_"; feature << targetTrigger; feature << ","; diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h index 3507da352..473a3c0e0 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.h +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -16,8 +16,6 @@ #include "moses/FactorTypeSet.h" #include "moses/Sentence.h" -#include "moses/FF/FFState.h" - #ifdef WITH_THREADS #include #endif @@ -76,10 +74,6 @@ public: void InitializeForInput(ttasksptr const& ttask); - const FFState* EmptyHypothesisState(const InputType &) const { - return new DummyState(); - } - //TODO: This implements the old interface, but cannot be updated because //it appears to be stateful void EvaluateWhenApplied(const Hypothesis& cur_hypo, diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index c7b7237aa..1815cd8f4 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -17,11 +17,8 @@ protected: bool m_legacy; public: - static const InputFeature& Instance() { - return *s_instance; - } - static InputFeature& InstanceNonConst() { - return *s_instance; + static const InputFeature *InstancePtr() { + return s_instance; } InputFeature(const std::string &line); diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h index a3db3487e..1dc7973f5 100644 --- a/moses/FF/InternalTree.h +++ b/moses/FF/InternalTree.h @@ -143,7 +143,15 @@ public: int Compare(const FFState& other) const { return 0; - }; + } + + virtual size_t hash() const { + return 0; + } + virtual bool operator==(const FFState& other) const { + return true; + } + }; -} \ No newline at end of file +} diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index fc8258631..4ff88bd15 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -107,7 +107,7 @@ EvaluateWhenApplied(const Hypothesis& hypo, { VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl); Scores score(GetNumScoreComponents(), 0); - const LRState *prev = dynamic_cast(prev_state); + const LRState *prev = static_cast(prev_state); LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out); out->PlusEquals(this, score); diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index 90de3ad9c..60d7a670c 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -331,6 +331,32 @@ Compare(const FFState& o) const return 1; } +size_t PhraseBasedReorderingState::hash() const +{ + size_t ret; + ret = hash_value(m_prevRange); + boost::hash_combine(ret, m_direction); + + return ret; +} + +bool PhraseBasedReorderingState::operator==(const FFState& o) const +{ + if (&o == this) return true; + + const PhraseBasedReorderingState &other = static_cast(o); + if (m_prevRange == other.m_prevRange) { + if (m_direction == LRModel::Forward) { + int compareScore = ComparePrevScores(other.m_prevOption); + return compareScore == 0; + } else { + return true; + } + } else { + return false; + } +} + LRState* PhraseBasedReorderingState:: Expand(const TranslationOption& topt, const InputType& input, @@ -356,6 +382,7 @@ int BidirectionalReorderingState:: Compare(FFState const& o) const { + /* if (&o == this) return 0; BidirectionalReorderingState const &other @@ -363,6 +390,25 @@ Compare(FFState const& o) const int cmp = m_backward->Compare(*other.m_backward); return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward); + */ +} + +size_t BidirectionalReorderingState::hash() const +{ + size_t ret = m_backward->hash(); + boost::hash_combine(ret, m_forward->hash()); + return ret; +} + +bool BidirectionalReorderingState::operator==(const FFState& o) const +{ + if (&o == this) return 0; + + BidirectionalReorderingState const &other + = static_cast(o); + + bool ret = (*m_backward == *other.m_backward) && (*m_forward == *other.m_forward); + return ret; } LRState* @@ -400,6 +446,20 @@ Compare(const FFState& o) const return m_reoStack.Compare(other.m_reoStack); } +size_t HReorderingBackwardState::hash() const +{ + size_t ret = m_reoStack.hash(); + return ret; +} + +bool HReorderingBackwardState::operator==(const FFState& o) const +{ + const HReorderingBackwardState& other + = static_cast(o); + bool ret = m_reoStack == other.m_reoStack; + return ret; +} + LRState* HReorderingBackwardState:: Expand(const TranslationOption& topt, const InputType& input, @@ -451,6 +511,26 @@ Compare(const FFState& o) const : (m_prevRange < other.m_prevRange) ? -1 : 1); } +size_t HReorderingForwardState::hash() const +{ + size_t ret; + ret = hash_value(m_prevRange); + return ret; +} + +bool HReorderingForwardState::operator==(const FFState& o) const +{ + if (&o == this) return true; + + HReorderingForwardState const& other + = static_cast(o); + + int compareScores = ((m_prevRange == other.m_prevRange) + ? ComparePrevScores(other.m_prevOption) + : (m_prevRange < other.m_prevRange) ? -1 : 1); + return compareScores == 0; +} + // For compatibility with the phrase-based reordering model, scoring is one // step delayed. // The forward model takes determines orientations heuristically as follows: diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 96b226a4e..be288deb9 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -143,10 +143,6 @@ public: typedef LRModel::ReorderingType ReorderingType; - virtual - int - Compare(const FFState& o) const = 0; - virtual LRState* Expand(const TranslationOption& hypo, const InputType& input, @@ -226,7 +222,9 @@ public: int Compare(const FFState& o) const; - virtual + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + LRState* Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const; @@ -253,6 +251,9 @@ public: int Compare(const FFState& o) const; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + virtual LRState* Expand(const TranslationOption& topt,const InputType& input, @@ -278,6 +279,10 @@ public: ReorderingStack reoStack); virtual int Compare(const FFState& o) const; + + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + virtual LRState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const; @@ -304,6 +309,10 @@ public: const TranslationOption &topt); virtual int Compare(const FFState& o) const; + + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + virtual LRState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const; diff --git a/moses/FF/LexicalReordering/ReorderingStack.cpp b/moses/FF/LexicalReordering/ReorderingStack.cpp index 49a723a36..84337b0e8 100644 --- a/moses/FF/LexicalReordering/ReorderingStack.cpp +++ b/moses/FF/LexicalReordering/ReorderingStack.cpp @@ -20,6 +20,18 @@ int ReorderingStack::Compare(const ReorderingStack& o) const return 0; } +size_t ReorderingStack::hash() const +{ + std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end()); + return ret; +} + +bool ReorderingStack::operator==(const ReorderingStack& o) const +{ + const ReorderingStack& other = static_cast(o); + return m_stack == other.m_stack; +} + // Method to push (shift element into the stack and reduce if reqd) int ReorderingStack::ShiftReduce(WordsRange input_span) { diff --git a/moses/FF/LexicalReordering/ReorderingStack.h b/moses/FF/LexicalReordering/ReorderingStack.h index 5a5b80d16..9d837e65d 100644 --- a/moses/FF/LexicalReordering/ReorderingStack.h +++ b/moses/FF/LexicalReordering/ReorderingStack.h @@ -28,6 +28,9 @@ private: public: int Compare(const ReorderingStack& o) const; + size_t hash() const; + bool operator==(const ReorderingStack& other) const; + int ShiftReduce(WordsRange input_span); private: diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 6c81ca414..f8416a284 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -8,6 +8,7 @@ #include "util/file_piece.hh" #include "util/string_piece.hh" +#include "util/string_stream.hh" #include "util/tokenize_piece.hh" #include "LexicalReordering.h" @@ -26,7 +27,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) { static string kSep = "-"; static string name; - ostringstream buf; + util::StringStream buf; // type side position id word reotype if (type == Phrase) { buf << "phr"; @@ -88,7 +89,7 @@ SparseReordering::SparseReordering(const map& config, const Lexic ReadWeightMap(i->second); m_useWeightMap = true; for (int reoType=0; reoType<=LRModel::MAX; ++reoType) { - ostringstream buf; + util::StringStream buf; buf << reoType; m_featureMap2.push_back(m_producer->GetFeatureName(buf.str())); } diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp index f971bbe8c..f7afbc782 100644 --- a/moses/FF/OSM-Feature/osmHyp.cpp +++ b/moses/FF/OSM-Feature/osmHyp.cpp @@ -39,6 +39,31 @@ int osmState::Compare(const FFState& otherBase) const return 0; } +size_t osmState::hash() const +{ + size_t ret = j; + + boost::hash_combine(ret, E); + boost::hash_combine(ret, gap); + boost::hash_combine(ret, lmState.length); + + return ret; +} + +bool osmState::operator==(const FFState& otherBase) const +{ + const osmState &other = static_cast(otherBase); + if (j != other.j) + return false; + if (E != other.E) + return false; + if (gap != other.gap) + return false; + if (lmState.length != other.lmState.length) + return false; + + return true; +} std::string osmState :: getName() const { @@ -157,11 +182,7 @@ int osmHypothesis :: firstOpenGap(vector & coverageVector) string osmHypothesis :: intToString(int num) { - - std::ostringstream stm; - stm< & gapVal); int getJ()const { return j; diff --git a/moses/FF/PhraseBoundaryFeature.cpp b/moses/FF/PhraseBoundaryFeature.cpp index 3fdcf27f9..b4a00b1ca 100644 --- a/moses/FF/PhraseBoundaryFeature.cpp +++ b/moses/FF/PhraseBoundaryFeature.cpp @@ -3,6 +3,7 @@ #include "moses/Hypothesis.h" #include "moses/TranslationOption.h" #include "moses/InputPath.h" +#include "util/string_stream.hh" using namespace std; @@ -17,6 +18,21 @@ int PhraseBoundaryState::Compare(const FFState& other) const return Word::Compare(*m_sourceWord,*(rhs.m_sourceWord)); } +size_t PhraseBoundaryState::hash() const +{ + size_t ret = hash_value(*m_targetWord); + boost::hash_combine(ret, hash_value(*m_sourceWord)); + + return ret; +} +bool PhraseBoundaryState::operator==(const FFState& other) const +{ + const PhraseBoundaryState& rhs = dynamic_cast(other); + bool ret = *m_targetWord == *rhs.m_targetWord && *m_sourceWord == *rhs.m_sourceWord; + return ret; +} + +///////////////////////////////////////////////////////////////////////////////////// PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line) : StatefulFeatureFunction(0, line) { @@ -46,7 +62,7 @@ void PhraseBoundaryFeature::AddFeatures( ScoreComponentCollection* scores) const { for (size_t i = 0; i < factors.size(); ++i) { - ostringstream name; + util::StringStream name; name << side << ":"; name << factors[i]; name << ":"; diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index a5b55e1ef..9d12303e9 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -24,6 +24,8 @@ public: return m_targetWord; } virtual int Compare(const FFState& other) const; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; private: diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp index 0eb0740b8..9dbb0235e 100644 --- a/moses/FF/PhraseLengthFeature.cpp +++ b/moses/FF/PhraseLengthFeature.cpp @@ -3,6 +3,7 @@ #include "moses/Hypothesis.h" #include "moses/ScoreComponentCollection.h" #include "moses/TranslationOption.h" +#include "util/string_stream.hh" namespace Moses { @@ -25,13 +26,13 @@ void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source size_t sourceLength = source.GetSize(); // create feature names - stringstream nameSource; + util::StringStream nameSource; nameSource << "s" << sourceLength; - stringstream nameTarget; + util::StringStream nameTarget; nameTarget << "t" << targetLength; - stringstream nameBoth; + util::StringStream nameBoth; nameBoth << sourceLength << "," << targetLength; // increase feature counts diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h index 9e576946f..9ff36750c 100644 --- a/moses/FF/PhraseLengthFeature.h +++ b/moses/FF/PhraseLengthFeature.h @@ -6,7 +6,6 @@ #include #include "StatelessFeatureFunction.h" -#include "moses/FF/FFState.h" #include "moses/Word.h" #include "moses/FactorCollection.h" diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp index 0865dcac5..f20663459 100644 --- a/moses/FF/PhraseOrientationFeature.cpp +++ b/moses/FF/PhraseOrientationFeature.cpp @@ -21,8 +21,17 @@ namespace Moses { +size_t PhraseOrientationFeatureState::hash() const +{ + UTIL_THROW2("TODO:Haven't figure this out yet"); +} +bool PhraseOrientationFeatureState::operator==(const FFState& other) const +{ + UTIL_THROW2("TODO:Haven't figure this out yet"); +} +//////////////////////////////////////////////////////////////////////////////// const std::string PhraseOrientationFeature::MORIENT("M"); const std::string PhraseOrientationFeature::SORIENT("S"); const std::string PhraseOrientationFeature::DORIENT("D"); diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h index ad5b5a15e..0ed40927e 100644 --- a/moses/FF/PhraseOrientationFeature.h +++ b/moses/FF/PhraseOrientationFeature.h @@ -140,6 +140,9 @@ public: return 0; }; + virtual size_t hash() const; + virtual bool operator==(const FFState& other) const; + protected: static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) { diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp index 1e343877c..d531d9a54 100644 --- a/moses/FF/PhrasePairFeature.cpp +++ b/moses/FF/PhrasePairFeature.cpp @@ -7,6 +7,7 @@ #include "moses/TranslationOption.h" #include "moses/InputPath.h" #include "util/string_piece_hash.hh" +#include "util/string_stream.hh" #include "util/exception.hh" using namespace std; @@ -126,7 +127,8 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input const bool use_topicid_prob = isnt.GetUseTopicIdAndProb(); // compute pair - ostringstream pair; + util::StringStream pair; + pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() ); for (size_t i = 1; i < source.GetSize(); ++i) { const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId); @@ -145,7 +147,8 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input if(use_topicid) { // use topicid as trigger const long topicid = isnt.GetTopicId(); - stringstream feature; + util::StringStream feature; + feature << m_description << "_"; if (topicid == -1) feature << "unk"; @@ -159,13 +162,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input // use topic probabilities const vector &topicid_prob = *(isnt.GetTopicIdAndProb()); if (atol(topicid_prob[0].c_str()) == -1) { - stringstream feature; + util::StringStream feature; feature << m_description << "_unk_"; feature << pair.str(); scoreBreakdown.SparsePlusEquals(feature.str(), 1); } else { for (size_t i=0; i+1 < topicid_prob.size(); i+=2) { - stringstream feature; + util::StringStream feature; feature << m_description << "_"; feature << topicid_prob[i]; feature << "_"; @@ -179,7 +182,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input const long docid = isnt.GetDocumentId(); for (set::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) { string sourceTrigger = *p; - ostringstream namestr; + util::StringStream namestr; namestr << m_description << "_"; namestr << sourceTrigger; namestr << "_"; @@ -207,7 +210,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end(); if (m_unrestricted || sourceTriggerExists) { - ostringstream namestr; + util::StringStream namestr; namestr << m_description << "_"; namestr << sourceTrigger; namestr << "~"; @@ -237,7 +240,7 @@ void PhrasePairFeature::EvaluateInIsolation(const Phrase &source , ScoreComponentCollection &estimatedFutureScore) const { if (m_simple) { - ostringstream namestr; + util::StringStream namestr; namestr << m_description << "_"; namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() ); for (size_t i = 1; i < source.GetSize(); ++i) { diff --git a/moses/FF/RulePairUnlexicalizedSource.cpp b/moses/FF/RulePairUnlexicalizedSource.cpp index d65810af8..fe1a4f648 100644 --- a/moses/FF/RulePairUnlexicalizedSource.cpp +++ b/moses/FF/RulePairUnlexicalizedSource.cpp @@ -4,7 +4,7 @@ #include "moses/ScoreComponentCollection.h" #include "moses/FactorCollection.h" #include - +#include "util/string_stream.hh" using namespace std; @@ -58,7 +58,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source } } - ostringstream namestr; + util::StringStream namestr; for (size_t posT=0; posT(o); + return m_targetLen == other.m_targetLen; + } + }; class SkeletonStatefulFF : public StatefulFeatureFunction diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h index 9bd7ffb70..7540dad25 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h @@ -4,7 +4,6 @@ #include #include #include "StatelessFeatureFunction.h" -#include "FFState.h" #include "moses/Factor.h" namespace Moses diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp index ee9d4b719..a6d3cea8e 100644 --- a/moses/FF/SparseHieroReorderingFeature.cpp +++ b/moses/FF/SparseHieroReorderingFeature.cpp @@ -6,6 +6,7 @@ #include "moses/Sentence.h" #include "util/exception.hh" +#include "util/string_stream.hh" #include "SparseHieroReorderingFeature.h" @@ -202,7 +203,7 @@ void SparseHieroReorderingFeature::EvaluateWhenApplied( targetLeftRulePos < targetRightRulePos))) { isMonotone = false; } - stringstream buf; + util::StringStream buf; buf << "h_"; //sparse reordering, Huck if (m_type == SourceLeft || m_type == SourceCombined) { buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index 945402412..132af58b1 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -10,7 +10,6 @@ #include "moses/Sentence.h" #include "StatelessFeatureFunction.h" -#include "FFState.h" namespace Moses { diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp index 6816410f8..d85044817 100644 --- a/moses/FF/TargetBigramFeature.cpp +++ b/moses/FF/TargetBigramFeature.cpp @@ -17,6 +17,19 @@ int TargetBigramState::Compare(const FFState& other) const return Word::Compare(m_word,rhs.m_word); } +size_t TargetBigramState::hash() const +{ + std::size_t ret = hash_value(m_word); + return ret; +} + +bool TargetBigramState::operator==(const FFState& other) const +{ + const TargetBigramState& rhs = dynamic_cast(other); + return m_word == rhs.m_word; +} + +//////////////////////////////////////////////////////////////////////////////// TargetBigramFeature::TargetBigramFeature(const std::string &line) :StatefulFeatureFunction(0, line) { diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index f6e965808..7a3e97faf 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -21,6 +21,8 @@ public: return m_word; } virtual int Compare(const FFState& other) const; + size_t hash() const; + virtual bool operator==(const FFState& other) const; private: Word m_word; diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp index 8414e1bc2..c1863bf67 100644 --- a/moses/FF/TargetNgramFeature.cpp +++ b/moses/FF/TargetNgramFeature.cpp @@ -37,6 +37,38 @@ int TargetNgramState::Compare(const FFState& other) const } } +size_t TargetNgramState::hash() const +{ + std::size_t ret = boost::hash_range(m_words.begin(), m_words.end()); + return ret; +} + +bool TargetNgramState::operator==(const FFState& other) const +{ + const TargetNgramState& rhs = dynamic_cast(other); + int result; + if (m_words.size() == rhs.m_words.size()) { + for (size_t i = 0; i < m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return false; + } + return true; + } else if (m_words.size() < rhs.m_words.size()) { + for (size_t i = 0; i < m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return false; + } + return true; + } else { + for (size_t i = 0; i < rhs.m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return false; + } + return true; + } +} + +//////////////////////////////////////////////////////////////////////////// TargetNgramFeature::TargetNgramFeature(const std::string &line) :StatefulFeatureFunction(0, line) { @@ -108,7 +140,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, // extract all ngrams from current hypothesis vector prev_words(tnState->GetWords()); - stringstream curr_ngram; + util::StringStream curr_ngram; bool skip = false; // include lower order ngrams? @@ -166,7 +198,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, if (cur_hypo.GetWordsBitmap().IsComplete()) { for (size_t n = m_n; n >= smallest_n; --n) { - stringstream last_ngram; + util::StringStream last_ngram; skip = false; for (size_t i = cur_hypo.GetSize() - n + 1; i < cur_hypo.GetSize() && !skip; ++i) appendNgram(cur_hypo.GetWord(i), skip, last_ngram); @@ -176,7 +208,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, accumulator->PlusEquals(this, last_ngram.str(), 1); } } - return NULL; + return new TargetNgramState(); } // prepare new state @@ -196,7 +228,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, return new TargetNgramState(new_prev_words); } -void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const +void TargetNgramFeature::appendNgram(const Word& word, bool& skip, util::StringStream &ngram) const { // const string& w = word.GetFactor(m_factorType)->GetString(); const StringPiece w = word.GetString(m_factorType); @@ -249,7 +281,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo suffixTerminals++; // everything else else { - stringstream ngram; + util::StringStream ngram; ngram << m_baseName; if (m_factorType == 0) ngram << factorZero; @@ -360,7 +392,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo suffixTerminals = 0; // remove duplicates - stringstream curr_ngram; + util::StringStream curr_ngram; curr_ngram << m_baseName; curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType); curr_ngram << ":"; @@ -386,7 +418,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo // remove duplicates size_t size = contextFactor.size(); if (makePrefix && makeSuffix && (size <= m_n)) { - stringstream curr_ngram; + util::StringStream curr_ngram; curr_ngram << m_baseName; for (size_t i = 0; i < size; ++i) { curr_ngram << (*contextFactor[i]).GetString(m_factorType); @@ -404,7 +436,7 @@ FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo void TargetNgramFeature::MakePrefixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const { - stringstream ngram; + util::StringStream ngram; size_t size = contextFactor.size(); for (size_t k = 0; k < numberOfStartPos; ++k) { size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset; @@ -429,7 +461,7 @@ void TargetNgramFeature::MakePrefixNgrams(std::vector &contextFacto void TargetNgramFeature::MakeSuffixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const { - stringstream ngram; + util::StringStream ngram; for (size_t k = 0; k < numberOfEndPos; ++k) { size_t end_pos = contextFactor.size()-1-k-offset; for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) { diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 2e9e71db0..ed279397c 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -12,6 +12,7 @@ #include "moses/LM/SingleFactor.h" #include "moses/ChartHypothesis.h" #include "moses/ChartManager.h" +#include "util/string_stream.hh" namespace Moses { @@ -19,12 +20,17 @@ namespace Moses class TargetNgramState : public FFState { public: - TargetNgramState(std::vector &words): m_words(words) {} + TargetNgramState() {} + + TargetNgramState(const std::vector &words): m_words(words) {} const std::vector GetWords() const { return m_words; } virtual int Compare(const FFState& other) const; + size_t hash() const; + virtual bool operator==(const FFState& other) const; + private: std::vector m_words; }; @@ -171,6 +177,45 @@ public: } return 0; } + + size_t hash() const { + // not sure if this is correct + size_t ret; + + ret = m_startPos; + boost::hash_combine(ret, m_endPos); + boost::hash_combine(ret, m_inputSize); + + // prefix + if (m_startPos > 0) { // not for " ..." + boost::hash_combine(ret, hash_value(GetPrefix())); + } + + if (m_endPos < m_inputSize - 1) { // not for "... " + boost::hash_combine(ret, hash_value(GetSuffix())); + } + + return ret; + } + virtual bool operator==(const FFState& o) const { + const TargetNgramChartState &other = + static_cast( o ); + + // prefix + if (m_startPos > 0) { // not for " ..." + int ret = GetPrefix().Compare(other.GetPrefix()); + if (ret != 0) + return false; + } + + if (m_endPos < m_inputSize - 1) { // not for "... " + int ret = GetSuffix().Compare(other.GetSuffix()); + if (ret != 0) + return false; + } + return true; + } + }; /** Sets the features of observed ngrams. @@ -222,7 +267,7 @@ private: std::string m_baseName; - void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const; + void appendNgram(const Word& word, bool& skip, util::StringStream& ngram) const; void MakePrefixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos = 1, size_t offset = 0) const; void MakeSuffixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp index 1059e34de..88d0ef3a4 100644 --- a/moses/FF/WordTranslationFeature.cpp +++ b/moses/FF/WordTranslationFeature.cpp @@ -179,7 +179,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input if (m_simple) { // construct feature name - stringstream featureName; + util::StringStream featureName; featureName << m_description << "_"; featureName << sourceWord; featureName << "~"; @@ -193,7 +193,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input if(use_topicid) { // use topicid as trigger const long topicid = sentence.GetTopicId(); - stringstream feature; + util::StringStream feature; feature << m_description << "_"; if (topicid == -1) feature << "unk"; @@ -209,7 +209,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input // use topic probabilities const vector &topicid_prob = *(input.GetTopicIdAndProb()); if (atol(topicid_prob[0].c_str()) == -1) { - stringstream feature; + util::StringStream feature; feature << m_description << "_unk_"; feature << sourceWord; feature << "~"; @@ -217,7 +217,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input scoreBreakdown.SparsePlusEquals(feature.str(), 1); } else { for (size_t i=0; i+1 < topicid_prob.size(); i+=2) { - stringstream feature; + util::StringStream feature; feature << m_description << "_"; feature << topicid_prob[i]; feature << "_"; @@ -233,7 +233,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input const long docid = input.GetDocumentId(); for (boost::unordered_set::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) { string sourceTrigger = *p; - stringstream feature; + util::StringStream feature; feature << m_description << "_"; feature << sourceTrigger; feature << "_"; @@ -248,7 +248,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input size_t globalSourceIndex = inputPath.GetWordsRange().GetStartPos() + sourceIndex; if (!m_domainTrigger && globalSourceIndex == 0) { // add trigger feature for source - stringstream feature; + util::StringStream feature; feature << m_description << "_"; feature << ","; feature << sourceWord; @@ -278,7 +278,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input if (m_domainTrigger) { if (sourceTriggerExists) { - stringstream feature; + util::StringStream feature; feature << m_description << "_"; feature << sourceTrigger; feature << "_"; @@ -288,7 +288,7 @@ void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input scoreBreakdown.SparsePlusEquals(feature.str(), 1); } } else if (m_unrestricted || sourceTriggerExists) { - stringstream feature; + util::StringStream feature; feature << m_description << "_"; if (contextIndex < globalSourceIndex) { feature << sourceTrigger; diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index 9ca41da46..fee9cb668 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -5,7 +5,6 @@ #include "moses/FactorCollection.h" #include "moses/Sentence.h" -#include "FFState.h" #include "StatelessFeatureFunction.h" namespace Moses @@ -43,10 +42,6 @@ public: void Load(); - const FFState* EmptyHypothesisState(const InputType &) const { - return new DummyState(); - } - void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase diff --git a/moses/FeatureVector.cpp b/moses/FeatureVector.cpp index 4195c5636..45a198c84 100644 --- a/moses/FeatureVector.cpp +++ b/moses/FeatureVector.cpp @@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "FeatureVector.h" #include "util/string_piece_hash.hh" +#include "util/string_stream.hh" using namespace std; @@ -204,7 +205,7 @@ void FVector::save(const string& filename) const { ofstream out(filename.c_str()); if (!out) { - ostringstream msg; + util::StringStream msg; msg << "Unable to open " << filename; throw runtime_error(msg.str()); } diff --git a/moses/File.h b/moses/File.h index fbf666ef9..43eacee83 100644 --- a/moses/File.h +++ b/moses/File.h @@ -12,6 +12,7 @@ #include #include #include "util/exception.hh" +#include "util/string_stream.hh" #include "TypeDef.h" #include "Util.h" @@ -147,7 +148,7 @@ inline OFF_T fTell(FILE* f) inline void fSeek(FILE* f,OFF_T o) { if(FSEEKO(f,o,SEEK_SET)<0) { - std::stringstream strme; + util::StringStream strme; strme << "ERROR: could not fseeko position " << o <<"\n"; if(o==InvalidOffT) strme << "You tried to seek for 'InvalidOffT'!\n"; UTIL_THROW2(strme.str()); diff --git a/moses/GenerationDictionary.cpp b/moses/GenerationDictionary.cpp index ddb1428d9..40ff28177 100644 --- a/moses/GenerationDictionary.cpp +++ b/moses/GenerationDictionary.cpp @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "InputFileStream.h" #include "StaticData.h" #include "util/exception.hh" +#include "util/string_stream.hh" using namespace std; @@ -84,9 +85,9 @@ void GenerationDictionary::Load() size_t numFeaturesInFile = token.size() - 2; if (numFeaturesInFile < numFeatureValuesInConfig) { - stringstream strme; + util::StringStream strme; strme << m_filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig - << " feature values, but found " << numFeaturesInFile << std::endl; + << " feature values, but found " << numFeaturesInFile << "\n"; throw strme.str(); } std::vector scores(numFeatureValuesInConfig, 0.0f); diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index a75d68049..399a4b821 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -195,35 +195,6 @@ Create(Manager& manager, InputType const& m_source, #endif } -/** check, if two hypothesis can be recombined. - this is actually a sorting function that allows us to - keep an ordered list of hypotheses. This makes recombination - much quicker. -*/ -int -Hypothesis:: -RecombineCompare(const Hypothesis &compare) const -{ - // -1 = this < compare - // +1 = this > compare - // 0 = this ==compare - int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted); - if (comp != 0) - return comp; - - for (unsigned i = 0; i < m_ffStates.size(); ++i) { - if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) { - // TODO: Can this situation actually occur? - comp = int(m_ffStates[i] != NULL) - int(compare.m_ffStates[i] != NULL); - } else { - comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]); - } - if (comp != 0) return comp; - } - - return 0; -} - void Hypothesis:: EvaluateWhenApplied(StatefulFeatureFunction const& sfff, @@ -647,6 +618,40 @@ GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const return ret; } +size_t Hypothesis::hash() const +{ + size_t seed; + + // coverage + seed = m_sourceCompleted.hash(); + + // states + for (size_t i = 0; i < m_ffStates.size(); ++i) { + const FFState *state = m_ffStates[i]; + size_t hash = state->hash(); + boost::hash_combine(seed, hash); + } + return seed; +} + +bool Hypothesis::operator==(const Hypothesis& other) const +{ + // coverage + if (m_sourceCompleted != other.m_sourceCompleted) { + return false; + } + + // states + for (size_t i = 0; i < m_ffStates.size(); ++i) { + const FFState &thisState = *m_ffStates[i]; + const FFState &otherState = *other.m_ffStates[i]; + if (thisState != otherState) { + return false; + } + } + return true; +} + #ifdef HAVE_XMLRPC_C void Hypothesis:: diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h index 8c198f2da..ba03cb663 100644 --- a/moses/Hypothesis.h +++ b/moses/Hypothesis.h @@ -197,8 +197,6 @@ public: return m_sourceCompleted.IsComplete(); } - int RecombineCompare(const Hypothesis &compare) const; - void GetOutputPhrase(Phrase &out) const; void ToStream(std::ostream& out) const { @@ -211,7 +209,7 @@ public: if (m_prevHypo != NULL) { m_prevHypo->ToStream(out); } - out << (Phrase) GetCurrTargetPhrase(); + out << (const Phrase&) GetCurrTargetPhrase(); } std::string GetOutputString() const { @@ -288,13 +286,16 @@ public: // creates a map of TARGET positions which should be replaced by word using placeholder std::map GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const; + // for unordered_set in stack + size_t hash() const; + bool operator==(const Hypothesis& other) const; + #ifdef HAVE_XMLRPC_C void OutputWordAlignment(std::vector& out) const; void OutputLocalWordAlignment(std::vector& dest) const; #endif - }; std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis); @@ -318,21 +319,17 @@ struct CompareHypothesisTotalScore { #define FREEHYPO(hypo) delete hypo #endif -/** defines less-than relation on hypotheses. -* The particular order is not important for us, we need just to figure out -* which hypothesis are equal based on: -* the last n-1 target words are the same -* and the covers (source words translated) are the same -* Directly using RecombineCompare is unreliable because the Compare methods -* of some states are based on archictecture-dependent pointer comparisons. -* That's why we use the hypothesis IDs instead. -*/ -class HypothesisRecombinationOrderer +class HypothesisRecombinationUnordered { public: - bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const { - return (hypoA->RecombineCompare(*hypoB) < 0); + size_t operator()(const Hypothesis* hypo) const { + return hypo->hash(); } + + bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const { + return (*hypoA) == (*hypoB); + } + }; } diff --git a/moses/HypothesisStack.h b/moses/HypothesisStack.h index 0c3d4198f..5c242e596 100644 --- a/moses/HypothesisStack.h +++ b/moses/HypothesisStack.h @@ -3,6 +3,7 @@ #include #include +#include #include "Hypothesis.h" #include "WordsBitmap.h" @@ -18,7 +19,7 @@ class HypothesisStack { protected: - typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType; + typedef boost::unordered_set< Hypothesis*, HypothesisRecombinationUnordered, HypothesisRecombinationUnordered > _HCType; _HCType m_hypos; /**< contains hypotheses */ Manager& m_manager; diff --git a/moses/LM/BackwardLMState.cpp b/moses/LM/BackwardLMState.cpp index 466c4b655..22e7b2801 100644 --- a/moses/LM/BackwardLMState.cpp +++ b/moses/LM/BackwardLMState.cpp @@ -31,4 +31,16 @@ int BackwardLMState::Compare(const FFState &o) const return state.left.Compare(other.state.left); } +size_t BackwardLMState::hash() const +{ + size_t ret = hash_value(state.left); + return ret; +} +bool BackwardLMState::operator==(const FFState& o) const +{ + const BackwardLMState &other = static_cast(o); + bool ret = state.left == other.state.left; + return ret; +} + } diff --git a/moses/LM/BackwardLMState.h b/moses/LM/BackwardLMState.h index 09a768462..765a8350d 100644 --- a/moses/LM/BackwardLMState.h +++ b/moses/LM/BackwardLMState.h @@ -47,14 +47,11 @@ class BackwardLMState : public FFState public: - /* - int Compare(const FFState &o) const { - const BackwardLMState &other = static_cast(o); - return state.left.Compare(other.state.left); - } - */ int Compare(const FFState &o) const; + size_t hash() const; + virtual bool operator==(const FFState& other) const; + // Allow BackwardLanguageModel to access the private members of this class template friend class BackwardLanguageModel; diff --git a/moses/LM/BilingualLM.h b/moses/LM/BilingualLM.h index 67a6c2ea1..d9b068870 100644 --- a/moses/LM/BilingualLM.h +++ b/moses/LM/BilingualLM.h @@ -38,6 +38,15 @@ public: } int Compare(const FFState& other) const; + + virtual size_t hash() const { + return m_hash; + } + virtual bool operator==(const FFState& other) const { + const BilingualLMState &otherState = static_cast(other); + return m_hash == otherState.m_hash; + } + }; class BilingualLM : public StatefulFeatureFunction diff --git a/moses/LM/ChartState.h b/moses/LM/ChartState.h index f5afbca1c..210006763 100644 --- a/moses/LM/ChartState.h +++ b/moses/LM/ChartState.h @@ -146,6 +146,7 @@ public: } int Compare(const FFState& o) const { + /* const LanguageModelChartState &other = dynamic_cast( o ); @@ -164,7 +165,49 @@ public: return ret; } return 0; + */ } + + size_t hash() const { + size_t ret; + + // prefix + ret = m_hypo.GetCurrSourceRange().GetStartPos() > 0; + if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for " ..." + size_t hash = hash_value(GetPrefix()); + boost::hash_combine(ret, hash); + } + + // suffix + size_t inputSize = m_hypo.GetManager().GetSource().GetSize(); + boost::hash_combine(ret, m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1); + if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... " + size_t hash = m_lmRightContext->hash(); + boost::hash_combine(ret, hash); + } + + return ret; + } + virtual bool operator==(const FFState& o) const { + const LanguageModelChartState &other = + dynamic_cast( o ); + + // prefix + if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for " ..." + bool ret = GetPrefix() == other.GetPrefix(); + if (ret == false) + return false; + } + + // suffix + size_t inputSize = m_hypo.GetManager().GetSource().GetSize(); + if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... " + bool ret = (*other.GetRightContext()) == (*m_lmRightContext); + return ret; + } + return true; + } + }; } // namespace diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index 638060b0e..d10f46ebd 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -68,6 +68,13 @@ public: else return state.compare(o.state); } + virtual size_t hash() const { + UTIL_THROW2("TODO:Haven't figure this out yet"); + } + virtual bool operator==(const FFState& other) const { + UTIL_THROW2("TODO:Haven't figure this out yet"); + } + DALM::State &get_state() { return state; } @@ -178,6 +185,14 @@ public: if(rightContext.get_count() > o.rightContext.get_count()) return 1; return rightContext.compare(o.rightContext); } + + virtual size_t hash() const { + UTIL_THROW2("TODO:Haven't figure this out yet"); + } + virtual bool operator==(const FFState& other) const { + UTIL_THROW2("TODO:Haven't figure this out yet"); + } + }; LanguageModelDALM::LanguageModelDALM(const std::string &line) diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index e86275050..f982ffcff 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "lm/model.hh" #include "util/exception.hh" #include "util/tokenize_piece.hh" +#include "util/string_stream.hh" #include "Ken.h" #include "Base.h" @@ -61,6 +62,17 @@ struct KenLMState : public FFState { if (state.length > other.state.length) return 1; return std::memcmp(state.words, other.state.words, sizeof(lm::WordIndex) * state.length); } + + virtual size_t hash() const { + size_t ret = hash_value(state); + return ret; + } + virtual bool operator==(const FFState& o) const { + const KenLMState &other = static_cast(o); + bool ret = state == other.state; + return ret; + } + }; ///* @@ -307,6 +319,16 @@ public: return ret; } + size_t hash() const { + size_t ret = hash_value(m_state); + return ret; + } + virtual bool operator==(const FFState& o) const { + const LanguageModelChartStateKenLM &other = static_cast(o); + bool ret = m_state == other.m_state; + return ret; + } + private: lm::ngram::ChartState m_state; }; @@ -383,7 +405,7 @@ template FFState *LanguageModelKen::EvaluateWhenApplied(con } else if (word.IsNonTerminal()) { // Non-terminal is first so we can copy instead of rescoring. const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]]; - const lm::ngram::ChartState &prevState = static_cast(pred->state[featureID])->GetChartState(); + const lm::ngram::ChartState &prevState = static_cast(pred->states[featureID])->GetChartState(); float prob = UntransformLMScore( pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]); ruleScore.BeginNonTerminal(prevState, prob); @@ -395,7 +417,7 @@ template FFState *LanguageModelKen::EvaluateWhenApplied(con const Word &word = target.GetWord(phrasePos); if (word.IsNonTerminal()) { const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]]; - const lm::ngram::ChartState &prevState = static_cast(pred->state[featureID])->GetChartState(); + const lm::ngram::ChartState &prevState = static_cast(pred->states[featureID])->GetChartState(); float prob = UntransformLMScore( pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]); ruleScore.NonTerminal(prevState, prob); @@ -466,7 +488,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig) util::TokenIter argument(lineOrig, ' '); ++argument; // KENLM - stringstream line; + util::StringStream line; line << "KENLM"; for (; argument; ++argument) { diff --git a/moses/LM/PointerState.h b/moses/LM/PointerState.h index c6c425198..8b2268f18 100644 --- a/moses/LM/PointerState.h +++ b/moses/LM/PointerState.h @@ -16,6 +16,15 @@ struct PointerState : public FFState { else if (other.lmstate < lmstate) return -1; return 0; } + + virtual size_t hash() const { + return (size_t) lmstate; + } + virtual bool operator==(const FFState& other) const { + const PointerState& o = static_cast(other); + return lmstate == o.lmstate; + } + }; } // namespace diff --git a/moses/LM/Remote.cpp b/moses/LM/Remote.cpp index 33946442a..4214c3d63 100644 --- a/moses/LM/Remote.cpp +++ b/moses/LM/Remote.cpp @@ -5,6 +5,7 @@ #include #include "Remote.h" #include "moses/Factor.h" +#include "util/string_stream.hh" #if !defined(_WIN32) && !defined(_WIN64) #include @@ -96,7 +97,7 @@ LMResult LanguageModelRemote::GetValue(const std::vector &contextFa cur->boState = *reinterpret_cast(&m_curId); ++m_curId; - std::ostringstream os; + util::StringStream os; os << "prob "; if (event_word == NULL) { os << ""; @@ -111,9 +112,8 @@ LMResult LanguageModelRemote::GetValue(const std::vector &contextFa os << ' ' << f->GetString(); } } - os << std::endl; - std::string out = os.str(); - write(sock, out.c_str(), out.size()); + os << "\n"; + write(sock, os.str().c_str(), os.str().size()); char res[6]; int r = read(sock, res, 6); int errors = 0; diff --git a/moses/Manager.cpp b/moses/Manager.cpp index f5168f244..f8733d7dd 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -56,6 +56,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "util/exception.hh" #include "util/random.hh" +#include "util/string_stream.hh" using namespace std; @@ -1971,7 +1972,7 @@ void Manager::OutputSearchGraphSLF() const // Output search graph in HTK standard lattice format (SLF) bool slf = staticData.GetOutputSearchGraphSLF(); if (slf) { - stringstream fileName; + util::StringStream fileName; string dir; staticData.GetParameter().SetParameter(dir, "output-search-graph-slf", ""); diff --git a/moses/PDTAimp.cpp b/moses/PDTAimp.cpp index 6770a5c17..b8bafeb3e 100644 --- a/moses/PDTAimp.cpp +++ b/moses/PDTAimp.cpp @@ -11,7 +11,7 @@ PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p) distinctE(0) { m_numInputScores = 0; - m_inputFeature = &InputFeature::Instance(); + m_inputFeature = InputFeature::InstancePtr(); if (m_inputFeature) { const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0]; diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index 564b2e506..f6058cbd8 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Util.h" #include "InputFileStream.h" #include "StaticData.h" +#include "util/string_stream.hh" #include "util/exception.hh" #include "util/random.hh" #include @@ -701,7 +702,7 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName) size_t currOldInd = 0; for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) { - stringstream ptLine; + util::StringStream ptLine; vector token = Tokenize(translationVector[currDict]); @@ -860,7 +861,7 @@ ConvertWeightArgsDistortion() } SetWeight("LexicalReordering", indTable, weights); - stringstream strme; + util::StringStream strme; strme << "LexicalReordering " << "type=" << toks[1] << " "; @@ -1007,7 +1008,7 @@ ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string } SetWeight(newWeightName, indTable, weights); - stringstream strme; + util::StringStream strme; strme << "Generation " << "input-factor=" << modelToks[0] << " " << "output-factor=" << modelToks[1] << " " diff --git a/moses/Phrase.cpp b/moses/Phrase.cpp index 7a9e847ba..e47e0b4b9 100644 --- a/moses/Phrase.cpp +++ b/moses/Phrase.cpp @@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "StaticData.h" // GetMaxNumFactors #include "util/string_piece.hh" +#include "util/string_stream.hh" #include "util/tokenize_piece.hh" using namespace std; @@ -117,7 +118,7 @@ std::string Phrase::GetStringRep(const vector factorsToPrint) const { bool markUnknown = StaticData::Instance().GetMarkUnknown(); - stringstream strme; + util::StringStream strme; for (size_t pos = 0 ; pos < GetSize() ; pos++) { if (markUnknown && GetWord(pos).IsOOV()) { strme << StaticData::Instance().GetUnknownWordPrefix(); diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index 4d79d5565..d9810224e 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -4,6 +4,7 @@ #include #include #include "util/exception.hh" +#include "util/string_stream.hh" #include "ScoreComponentCollection.h" #include "StaticData.h" #include "moses/FF/StatelessFeatureFunction.h" @@ -88,9 +89,8 @@ void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float s { std::string prefix = sp->GetScoreProducerDescription() + FName::SEP; for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) { - std::stringstream name; - name << i->first; - if (starts_with(name.str(), prefix)) + const std::string &name = i->first.name(); + if (starts_with(name, prefix)) m_scores[i->first] = i->second * scalar; } } @@ -101,9 +101,8 @@ size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp) std::string prefix = sp->GetScoreProducerDescription() + FName::SEP; size_t weights = 0; for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) { - std::stringstream name; - name << i->first; - if (starts_with(name.str(), prefix)) + const std::string &name = i->first.name(); + if (starts_with(name, prefix)) weights++; } return weights; @@ -215,7 +214,7 @@ void ScoreComponentCollection::Save(const string& filename) const { ofstream out(filename.c_str()); if (!out) { - ostringstream msg; + util::StringStream msg; msg << "Unable to open " << filename; throw runtime_error(msg.str()); } diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 53b60e850..341e4d0cc 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -640,7 +640,7 @@ void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const switch (decodeType) { case Translate: if(index>=pts.size()) { - stringstream strme; + util::StringStream strme; strme << "No phrase dictionary with index " << index << " available!"; UTIL_THROW(util::Exception, strme.str()); @@ -649,7 +649,7 @@ void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const break; case Generate: if(index>=gens.size()) { - stringstream strme; + util::StringStream strme; strme << "No generation dictionary with index " << index << " available!"; UTIL_THROW(util::Exception, strme.str()); diff --git a/moses/Syntax/Cube.cpp b/moses/Syntax/Cube.cpp index 6cbf72903..0493c448e 100644 --- a/moses/Syntax/Cube.cpp +++ b/moses/Syntax/Cube.cpp @@ -93,7 +93,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector &coordinates) SVertex *head = new SVertex(); head->best = hyperedge; head->pvertex = 0; // FIXME??? - head->state.resize( + head->states.resize( StatefulFeatureFunction::GetStatefulFeatureFunctions().size()); hyperedge->head = head; @@ -131,7 +131,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector &coordinates) StatefulFeatureFunction::GetStatefulFeatureFunctions(); for (unsigned i = 0; i < ffs.size(); ++i) { if (!staticData.IsFeatureFunctionIgnored(*ffs[i])) { - head->state[i] = + head->states[i] = ffs[i]->EvaluateWhenApplied(*hyperedge, i, &hyperedge->label.scoreBreakdown); } diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp index 09423f5d3..1e00c594a 100644 --- a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp @@ -4,6 +4,7 @@ #include "moses/FF/UnknownWordPenaltyProducer.h" #include "moses/StaticData.h" +#include "util/string_stream.hh" namespace Moses { @@ -55,7 +56,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( TargetPhrase *targetPhrase = new TargetPhrase(); - std::ostringstream alignmentSS; + util::StringStream alignmentSS; for (std::size_t i = 0; i < e.tail.size(); ++i) { const Word &symbol = e.tail[i]->pvertex.symbol; if (symbol.IsNonTerminal()) { diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h index 5c1b41295..19446c5cd 100644 --- a/moses/Syntax/F2S/Manager-inl.h +++ b/moses/Syntax/F2S/Manager-inl.h @@ -285,7 +285,7 @@ void Manager::RecombineAndSort( // head pointers are updated to point to the vertex instances in the map and // any 'duplicate' vertices are deleted. // TODO Set? - typedef std::map Map; + typedef boost::unordered_map Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index 4963ec788..67588eb94 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -349,7 +349,7 @@ void Manager::RecombineAndSort(const std::vector &buffer, // head pointers are updated to point to the vertex instances in the map and // any 'duplicate' vertices are deleted. // TODO Set? - typedef std::map Map; + typedef boost::unordered_map Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/Syntax/SVertex.cpp b/moses/Syntax/SVertex.cpp index 32650b2a8..cd4c1c666 100644 --- a/moses/Syntax/SVertex.cpp +++ b/moses/Syntax/SVertex.cpp @@ -1,7 +1,5 @@ #include "SVertex.h" - #include "moses/FF/FFState.h" - #include "SHyperedge.h" namespace Moses @@ -18,11 +16,38 @@ SVertex::~SVertex() delete *p; } // Delete FFState objects. - for (std::vector::iterator p = state.begin(); - p != state.end(); ++p) { + for (std::vector::iterator p = states.begin(); + p != states.end(); ++p) { delete *p; } } +size_t SVertex::hash() const +{ + size_t seed; + + // states + for (size_t i = 0; i < states.size(); ++i) { + const FFState *state = states[i]; + size_t hash = state->hash(); + boost::hash_combine(seed, hash); + } + return seed; + +} + +bool SVertex::operator==(const SVertex& other) const +{ + // states + for (size_t i = 0; i < states.size(); ++i) { + const FFState &thisState = *states[i]; + const FFState &otherState = *other.states[i]; + if (thisState != otherState) { + return false; + } + } + return true; +} + } // Syntax } // Moses diff --git a/moses/Syntax/SVertex.h b/moses/Syntax/SVertex.h index e596cb442..9a5392d30 100644 --- a/moses/Syntax/SVertex.h +++ b/moses/Syntax/SVertex.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace Moses { @@ -23,7 +24,12 @@ struct SVertex { SHyperedge *best; std::vector recombined; const PVertex *pvertex; - std::vector state; + std::vector states; + + // for unordered_set in stack + size_t hash() const; + bool operator==(const SVertex& other) const; + }; } // Syntax diff --git a/moses/Syntax/SVertexRecombinationOrderer.h b/moses/Syntax/SVertexRecombinationOrderer.h index fcabed04a..a91a3b125 100644 --- a/moses/Syntax/SVertexRecombinationOrderer.h +++ b/moses/Syntax/SVertexRecombinationOrderer.h @@ -9,26 +9,18 @@ namespace Moses namespace Syntax { -struct SVertexRecombinationOrderer { + +class SVertexRecombinationUnordered +{ public: - bool operator()(const SVertex &x, const SVertex &y) const { - int comp = 0; - for (std::size_t i = 0; i < x.state.size(); ++i) { - if (x.state[i] == NULL || y.state[i] == NULL) { - comp = x.state[i] - y.state[i]; - } else { - comp = x.state[i]->Compare(*y.state[i]); - } - if (comp != 0) { - return comp < 0; - } - } - return false; + size_t operator()(const SVertex* hypo) const { + return hypo->hash(); } - bool operator()(const SVertex *x, const SVertex *y) const { - return operator()(*x, *y); + bool operator()(const SVertex* hypoA, const SVertex* hypoB) const { + return (*hypoA) == (*hypoB); } + }; } // Syntax diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp index 9c6dd91ab..0a0c07eea 100644 --- a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp @@ -47,7 +47,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( TargetPhrase *targetPhrase = new TargetPhrase(); - std::ostringstream alignmentSS; + util::StringStream alignmentSS; for (std::size_t i = 0; i < node.children.size(); ++i) { const Word &symbol = node.children[i]->pvertex.symbol; if (symbol.IsNonTerminal()) { diff --git a/moses/Syntax/T2S/Manager-inl.h b/moses/Syntax/T2S/Manager-inl.h index 344d804e7..46d8b7177 100644 --- a/moses/Syntax/T2S/Manager-inl.h +++ b/moses/Syntax/T2S/Manager-inl.h @@ -245,7 +245,7 @@ void Manager::RecombineAndSort( // head pointers are updated to point to the vertex instances in the map and // any 'duplicate' vertices are deleted. // TODO Set? - typedef std::map Map; + typedef boost::unordered_map Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/TranslationAnalysis.cpp b/moses/TranslationAnalysis.cpp index ed948f6b6..ebca957ce 100644 --- a/moses/TranslationAnalysis.cpp +++ b/moses/TranslationAnalysis.cpp @@ -10,6 +10,7 @@ #include "moses/FF/StatefulFeatureFunction.h" #include "moses/FF/StatelessFeatureFunction.h" #include "moses/LM/Base.h" +#include "util/string_stream.hh" using namespace Moses; @@ -40,8 +41,9 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo) if (doLMStats) lmAcc.resize((*tpi)->GetLMStats()->size(), 0); for (; tpi != translationPath.end(); ++tpi) { - std::ostringstream sms; - std::ostringstream tms; + util::StringStream sms; + + util::StringStream tms; std::string target = (*tpi)->GetTargetPhraseStringRep(); std::string source = (*tpi)->GetSourcePhraseStringRep(); WordsRange twr = (*tpi)->GetCurrTargetWordsRange(); diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp index 27209f5bc..4130bd6a4 100644 --- a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp +++ b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "BlockHashIndex.h" #include "CmphStringVectorAdapter.h" #include "util/exception.hh" +#include "util/string_stream.hh" #ifdef HAVE_CMPH #include "cmph.h" @@ -98,11 +99,11 @@ size_t BlockHashIndex::GetFprint(const char* key) const size_t BlockHashIndex::GetHash(size_t i, const char* key) { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_mutex); -#endif - if(m_hashes[i] == 0) - LoadRange(i); +//#ifdef WITH_THREADS +// boost::mutex::scoped_lock lock(m_mutex); +//#endif + //if(m_hashes[i] == 0) + //LoadRange(i); #ifdef HAVE_CMPH size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key)); #else @@ -322,9 +323,10 @@ size_t BlockHashIndex::GetSize() const void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance) { -#ifdef WITH_THREADS + /* + #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); -#endif + #endif size_t n = m_hashes.size() * ratio; size_t max = n * (1 + tolerance); if(m_numLoadedRanges > max) { @@ -338,7 +340,7 @@ void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance) for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance)); it != lastLoaded.rend(); it++) DropRange(it->second); - } + }*/ } void BlockHashIndex::CalcHash(size_t current, void* source_void) @@ -366,10 +368,10 @@ void BlockHashIndex::CalcHash(size_t current, void* source_void) if(lastKey > temp) { if(source->nkeys != 2 || temp != "###DUMMY_KEY###") { - std::stringstream strme; - strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl; - strme << "1: " << lastKey << std::endl; - strme << "2: " << temp << std::endl; + util::StringStream strme; + strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n"; + strme << "1: " << lastKey << "\n"; + strme << "2: " << temp << "\n"; UTIL_THROW2(strme.str()); } } diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h index 0f20fa1b2..3de46272a 100644 --- a/moses/TranslationModel/CompactPT/BlockHashIndex.h +++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h @@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "StringVector.h" #include "PackedArray.h" #include "util/exception.hh" +#include "util/string_stream.hh" #ifdef WITH_THREADS #include "moses/ThreadPool.h" @@ -145,10 +146,10 @@ public: size_t current = m_landmarks.size(); if(m_landmarks.size() && m_landmarks.back().str() >= keys[0]) { - std::stringstream strme; - strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl; - strme << "1: " << m_landmarks.back().str() << std::endl; - strme << "2: " << keys[0] << std::endl; + util::StringStream strme; + strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n"; + strme << "1: " << m_landmarks.back().str() << "\n"; + strme << "2: " << keys[0] << "\n"; UTIL_THROW2(strme.str()); } diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp index cd71b1776..729df525b 100644 --- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp +++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp @@ -155,10 +155,12 @@ LexicalReorderingTableCompact:: Load(std::string filePath) { std::FILE* pFile = std::fopen(filePath.c_str(), "r"); - if(m_inMemory) - m_hash.Load(pFile); - else - m_hash.LoadIndex(pFile); + UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened"); + + //if(m_inMemory) + m_hash.Load(pFile); + //else + //m_hash.LoadIndex(pFile); size_t read = 0; read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile); diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index afed99057..efa015140 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include #include "PhraseDictionaryCompact.h" #include "moses/FactorCollection.h" @@ -43,6 +44,8 @@ using namespace boost::algorithm; namespace Moses { +typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache; + PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line) :PhraseDictionary(line, true) ,m_inMemory(true) @@ -75,12 +78,12 @@ void PhraseDictionaryCompact::Load() std::FILE* pFile = std::fopen(tFilePath.c_str() , "r"); size_t indexSize; - if(m_inMemory) - // Load source phrase index into memory - indexSize = m_hash.Load(pFile); - else - // Keep source phrase index on disk - indexSize = m_hash.LoadIndex(pFile); + //if(m_inMemory) + // Load source phrase index into memory + indexSize = m_hash.Load(pFile); +// else + // Keep source phrase index on disk + //indexSize = m_hash.LoadIndex(pFile); size_t coderSize = m_phraseDecoder->Load(pFile); @@ -162,13 +165,9 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact() void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc) { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_sentenceMutex); - PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()]; -#else - PhraseCache &ref = m_sentenceCache; -#endif - ref.push_back(tpc); + if(!m_sentenceCache.get()) + m_sentenceCache.reset(new PhraseCache()); + m_sentenceCache->push_back(tpc); } void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source, @@ -176,23 +175,16 @@ void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source, void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &source) { - if(!m_inMemory) - m_hash.KeepNLastRanges(0.01, 0.2); + if(!m_sentenceCache.get()) + m_sentenceCache.reset(new PhraseCache()); m_phraseDecoder->PruneCache(); - -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_sentenceMutex); - PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()]; -#else - PhraseCache &ref = m_sentenceCache; -#endif - - for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) + for(PhraseCache::iterator it = m_sentenceCache->begin(); + it != m_sentenceCache->end(); it++) delete *it; PhraseCache temp; - temp.swap(ref); + temp.swap(*m_sentenceCache); ReduceCache(); } diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h index 379bbe844..665ded3fc 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h @@ -52,13 +52,8 @@ protected: bool m_useAlignmentInfo; typedef std::vector PhraseCache; -#ifdef WITH_THREADS - boost::mutex m_sentenceMutex; - typedef std::map SentenceCache; -#else - typedef PhraseCache SentenceCache; -#endif - SentenceCache m_sentenceCache; + typedef boost::thread_specific_ptr SentenceCache; + static SentenceCache m_sentenceCache; BlockHashIndex m_hash; PhraseDecoder* m_phraseDecoder; diff --git a/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp new file mode 100644 index 000000000..964ab4528 --- /dev/null +++ b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp @@ -0,0 +1,32 @@ +// $Id$ +// vim:tabstop=2 +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include "TargetPhraseCollectionCache.h" + +namespace Moses +{ + + +boost::thread_specific_ptr +TargetPhraseCollectionCache::m_phraseCache; + +} + diff --git a/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h index 1d5ed0da3..e017a3c19 100644 --- a/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h +++ b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h @@ -26,12 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#ifdef WITH_THREADS -#ifdef BOOST_HAS_PTHREADS -#include -#endif -#endif - +#include #include #include "moses/Phrase.h" @@ -63,12 +58,7 @@ private: }; typedef std::map CacheMap; - - CacheMap m_phraseCache; - -#ifdef WITH_THREADS - boost::mutex m_mutex; -#endif + static boost::thread_specific_ptr m_phraseCache; public: @@ -80,31 +70,37 @@ public: } iterator Begin() { - return m_phraseCache.begin(); + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->begin(); } const_iterator Begin() const { - return m_phraseCache.begin(); + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->begin(); } iterator End() { - return m_phraseCache.end(); + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->end(); } const_iterator End() const { - return m_phraseCache.end(); + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->end(); } /** retrieve translations for source phrase from persistent cache **/ void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0, size_t maxRank = 0) { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_mutex); -#endif - + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); // check if source phrase is already in cache - iterator it = m_phraseCache.find(sourcePhrase); - if(it != m_phraseCache.end()) + iterator it = m_phraseCache->find(sourcePhrase); + if(it != m_phraseCache->end()) // if found, just update clock it->second.m_clock = clock(); else { @@ -113,19 +109,17 @@ public: TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector()); tpv_temp->resize(maxRank); std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin()); - m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft); + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft); } else - m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft); + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft); } } std::pair Retrieve(const Phrase &sourcePhrase) { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_mutex); -#endif - - iterator it = m_phraseCache.find(sourcePhrase); - if(it != m_phraseCache.end()) { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + iterator it = m_phraseCache->find(sourcePhrase); + if(it != m_phraseCache->end()) { LastUsed &lu = it->second; lu.m_clock = clock(); return std::make_pair(lu.m_tpv, lu.m_bitsLeft); @@ -135,34 +129,31 @@ public: // if cache full, reduce void Prune() { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_mutex); -#endif - - if(m_phraseCache.size() > m_max * (1 + m_tolerance)) { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + if(m_phraseCache->size() > m_max * (1 + m_tolerance)) { typedef std::set > Cands; Cands cands; - for(CacheMap::iterator it = m_phraseCache.begin(); - it != m_phraseCache.end(); it++) { + for(CacheMap::iterator it = m_phraseCache->begin(); + it != m_phraseCache->end(); it++) { LastUsed &lu = it->second; cands.insert(std::make_pair(lu.m_clock, it->first)); } for(Cands::iterator it = cands.begin(); it != cands.end(); it++) { const Phrase& p = it->second; - m_phraseCache.erase(p); + m_phraseCache->erase(p); - if(m_phraseCache.size() < (m_max * (1 - m_tolerance))) + if(m_phraseCache->size() < (m_max * (1 - m_tolerance))) break; } } } void CleanUp() { -#ifdef WITH_THREADS - boost::mutex::scoped_lock lock(m_mutex); -#endif - m_phraseCache.clear(); + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + m_phraseCache->clear(); } }; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp index 8fc891979..e3ccaaf65 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp @@ -17,6 +17,7 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "util/exception.hh" +#include "util/string_stream.hh" #include "moses/TranslationModel/PhraseDictionaryMultiModel.h" @@ -38,7 +39,7 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line) } else if (m_mode == "all" || m_mode == "all-restrict") { UTIL_THROW2("Implementation has moved: use PhraseDictionaryGroup with restrict=true/false"); } else { - ostringstream msg; + util::StringStream msg; msg << "combination mode unknown: " << m_mode; throw runtime_error(msg.str()); } @@ -210,7 +211,7 @@ std::vector > PhraseDictionaryMultiModel::getWeights(size_t n raw_weights.push_back(1.0/m_numModels); //uniform weights created online } } else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) { - std::stringstream strme; + util::StringStream strme; strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << "."; UTIL_THROW(util::Exception, strme.str()); } else { diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp index 773e027cc..769e6410f 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp @@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "util/exception.hh" #include "util/tokenize.hh" +#include "util/string_stream.hh" #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h" using namespace std; @@ -56,7 +57,7 @@ void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, cons else if (m_mode == "interpolate") m_combineFunction = LinearInterpolationFromCounts; else { - ostringstream msg; + util::StringStream msg; msg << "combination mode unknown: " << m_mode; throw runtime_error(msg.str()); } diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp index ca1638708..f0c744155 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp @@ -15,6 +15,7 @@ #include "moses/PDTAimp.h" #include "moses/TranslationTask.h" #include "util/exception.hh" +#include "util/string_stream.hh" using namespace std; @@ -52,7 +53,7 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(ttasksptr const& ttask) vector weight = staticData.GetWeights(this); if(m_numScoreComponents!=weight.size()) { - std::stringstream strme; + util::StringStream strme; UTIL_THROW2("ERROR: mismatch of number of scaling factors: " << weight.size() << " " << m_numScoreComponents); } diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp index f9e6ac6fd..9386f8d81 100644 --- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp +++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp @@ -126,14 +126,14 @@ void ReformatHieroRule(const string &lineOrig, string &out) ReformatHieroRule(1, targetPhraseString, ntAlign); ReformateHieroScore(scoreString); - stringstream align; + util::StringStream align; map >::const_iterator iterAlign; for (iterAlign = ntAlign.begin(); iterAlign != ntAlign.end(); ++iterAlign) { const pair &alignPoint = iterAlign->second; align << alignPoint.first << "-" << alignPoint.second << " "; } - stringstream ret; + util::StringStream ret; ret << sourcePhraseString << " ||| " << targetPhraseString << " ||| " << scoreString << " ||| " diff --git a/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp b/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp index 9003817d8..eabdd8753 100644 --- a/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp +++ b/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp @@ -7,13 +7,14 @@ // #include +#include "util/string_stream.hh" #include "SentenceAlignment.h" namespace tmmt { std::string SentenceAlignment::getTargetString(const Vocabulary &vocab) const { - std::stringstream strme; + util::StringStream strme; for (size_t i = 0; i < target.size(); ++i) { const WORD &word = vocab.GetWord(target[i]); strme << word << " "; diff --git a/moses/TranslationModel/fuzzy-match/SentenceAlignment.h b/moses/TranslationModel/fuzzy-match/SentenceAlignment.h index a777c1eb0..4d6dc430c 100644 --- a/moses/TranslationModel/fuzzy-match/SentenceAlignment.h +++ b/moses/TranslationModel/fuzzy-match/SentenceAlignment.h @@ -12,6 +12,7 @@ #include #include #include "Vocabulary.h" +#include "util/string_stream.hh" namespace tmmt { @@ -27,7 +28,7 @@ struct SentenceAlignment { std::string getTargetString(const Vocabulary &vocab) const; std::string getAlignmentString() const { - std::stringstream strme; + util::StringStream strme; for (size_t i = 0; i < alignment.size(); ++i) { const std::pair &alignPair = alignment[i]; strme << alignPair.first << "-" << alignPair.second << " "; diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 07544b88d..2355b8913 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -483,14 +483,14 @@ SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc) const ScorePair* inputScore = inputPath.GetInputScore(); if (inputScore == NULL) return; - const InputFeature &inputFeature = InputFeature::Instance(); + const InputFeature *inputFeature = InputFeature::InstancePtr(); const std::vector &transOpts = oldPtoc.GetList(); for (size_t i = 0; i < transOpts.size(); ++i) { TranslationOption &transOpt = *transOpts[i]; ScoreComponentCollection &scores = transOpt.GetScoreBreakdown(); - scores.PlusEquals(&inputFeature, *inputScore); + scores.PlusEquals(inputFeature, *inputScore); } } diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 86d045b3f..aba2a451d 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -35,8 +35,8 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl()) if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd); - const InputFeature &inputFeature = InputFeature::Instance(); - UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified"); + const InputFeature *inputFeature = InputFeature::InstancePtr(); + UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified"); size_t inputSize = input.GetSize(); m_inputPathMatrix.resize(inputSize); diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp index 7fadc2542..b43ef81df 100644 --- a/moses/TranslationOptionCollectionLattice.cpp +++ b/moses/TranslationOptionCollectionLattice.cpp @@ -28,8 +28,8 @@ TranslationOptionCollectionLattice UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(), "Not for models using the legqacy binary phrase table"); - const InputFeature &inputFeature = InputFeature::Instance(); - UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified"); + const InputFeature *inputFeature = InputFeature::InstancePtr(); + UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified"); size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength(); size_t size = input.GetSize(); diff --git a/moses/Util.h b/moses/Util.h index 58152f7ce..d7dd8af2c 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include "util/exception.hh" +#include "util/string_stream.hh" #include "TypeDef.h" namespace Moses @@ -343,7 +344,7 @@ inline std::vector TokenizeFirstOnly(const std::string& str, template std::string Join(const std::string& delimiter, const std::vector& items) { - std::ostringstream outstr; + util::StringStream outstr; if(items.size() == 0) return ""; outstr << items[0]; for(unsigned int i = 1; i < items.size(); i++) @@ -357,7 +358,7 @@ std::string Join(const std::string& delimiter, const std::vector& items) template std::string Join(const std::string &delim, It begin, It end) { - std::ostringstream outstr; + util::StringStream outstr; if (begin != end) outstr << *begin++; for ( ; begin != end; ++begin) diff --git a/moses/Word.cpp b/moses/Word.cpp index f55be5ee8..679d8c05d 100644 --- a/moses/Word.cpp +++ b/moses/Word.cpp @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "FactorCollection.h" #include "StaticData.h" // needed to determine the FactorDelimiter #include "util/exception.hh" +#include "util/string_stream.hh" #include "util/tokenize_piece.hh" using namespace std; @@ -79,7 +80,7 @@ void Word::Merge(const Word &sourceWord) std::string Word::GetString(const vector factorType,bool endWithBlank) const { - stringstream strme; + util::StringStream strme; const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter(); bool firstPass = true; unsigned int stop = min(max_fax(),factorType.size()); @@ -195,7 +196,7 @@ TO_STRING_BODY(Word); // friend ostream& operator<<(ostream& out, const Word& word) { - stringstream strme; + util::StringStream strme; const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter(); bool firstPass = true; unsigned int stop = max_fax(); @@ -208,7 +209,7 @@ ostream& operator<<(ostream& out, const Word& word) } else { strme << factorDelimiter; } - strme << *factor; + strme << factor->GetString(); } } out << strme.str() << " "; diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp index 7804c9a58..c68d182b0 100644 --- a/moses/WordLattice.cpp +++ b/moses/WordLattice.cpp @@ -12,7 +12,7 @@ namespace Moses { WordLattice::WordLattice() : ConfusionNet() { - UTIL_THROW_IF2(&InputFeature::Instance() == NULL, + UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified"); } @@ -57,9 +57,9 @@ InitializeFromPCNDataType const std::string& debug_line) { // const StaticData &staticData = StaticData::Instance(); - const InputFeature &inputFeature = InputFeature::Instance(); - size_t numInputScores = inputFeature.GetNumInputScores(); - size_t numRealWordCount = inputFeature.GetNumRealWordsInInput(); + const InputFeature *inputFeature = InputFeature::InstancePtr(); + size_t numInputScores = inputFeature->GetNumInputScores(); + size_t numRealWordCount = inputFeature->GetNumRealWordsInInput(); size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); diff --git a/moses/WordsBitmap.cpp b/moses/WordsBitmap.cpp index 17340ffac..d332b1243 100644 --- a/moses/WordsBitmap.cpp +++ b/moses/WordsBitmap.cpp @@ -19,6 +19,7 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ +#include #include "WordsBitmap.h" namespace Moses @@ -34,4 +35,27 @@ bool WordsBitmap::IsAdjacent(size_t startPos, size_t endPos) const endPos == GetLastGapPos(); } +// for unordered_set in stack +size_t WordsBitmap::hash() const +{ + size_t ret = boost::hash_value(m_bitmap); + return ret; } + +bool WordsBitmap::operator==(const WordsBitmap& other) const +{ + return m_bitmap == other.m_bitmap; +} + +// friend +std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap) +{ + for (size_t i = 0 ; i < wordsBitmap.m_bitmap.size() ; i++) { + out << int(wordsBitmap.GetValue(i)); + } + return out; +} + +} // namespace + + diff --git a/moses/WordsBitmap.h b/moses/WordsBitmap.h index c1dcb8acf..feb56929f 100644 --- a/moses/WordsBitmap.h +++ b/moses/WordsBitmap.h @@ -261,17 +261,15 @@ public: return id + (1<<16) * start; } + // for unordered_set in stack + size_t hash() const; + bool operator==(const WordsBitmap& other) const; + bool operator!=(const WordsBitmap& other) const { + return !(*this == other); + } + TO_STRING(); }; -// friend -inline std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap) -{ - for (size_t i = 0 ; i < wordsBitmap.m_bitmap.size() ; i++) { - out << int(wordsBitmap.GetValue(i)); - } - return out; -} - } #endif diff --git a/moses/WordsRange.h b/moses/WordsRange.h index 4a38ecde7..98f96313f 100644 --- a/moses/WordsRange.h +++ b/moses/WordsRange.h @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #define moses_WordsRange_h #include +#include #include "TypeDef.h" #include "Util.h" #include "util/exception.hh" @@ -94,6 +95,12 @@ public: TO_STRING(); }; +inline size_t hash_value(const WordsRange& range) +{ + size_t seed = range.GetStartPos(); + boost::hash_combine(seed, range.GetEndPos()); + return seed; +} } #endif diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 664fa67a4..562cf8ec2 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -947,6 +947,21 @@ parse-input-devtest pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval ignore-unless: use-mira template: $input-parser < IN > OUT +parse-relax-input + in: split-input + out: input + default-name: tuning/input.parse-relaxed + pass-unless: input-parse-relaxer + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + template: $input-parse-relaxer < IN > OUT +parse-relax-input-devtest + in: split-input-devtest + out: input-devtest + default-name: tuning/input.devtest.parse-relaxed + pass-unless: input-parse-relaxer + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + ignore-unless: use-mira + template: $input-parse-relaxer < IN > OUT factorize-input in: parsed-input out: factorized-input @@ -1008,35 +1023,20 @@ truecase-input-devtest ignore-unless: AND input-truecaser use-mira template: $input-truecaser -model IN1.$input-extension < IN > OUT split-input - in: truecased-input + in: truecased-input SPLITTER:splitter-model out: split-input - rerun-on-change: input-splitter SPLITTER:splitter-model + rerun-on-change: input-splitter default-name: tuning/input.split pass-unless: input-splitter - template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT + template: $input-splitter -model IN1.$input-extension < IN > OUT split-input-devtest - in: truecased-input-devtest + in: truecased-input-devtest SPLITTER:splitter-model out: split-input-devtest rerun-on-change: input-splitter default-name: tuning/input.devtest.split pass-unless: input-splitter ignore-unless: use-mira - template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT -parse-relax-input - in: split-input - out: input - default-name: tuning/input.parse-relaxed - pass-unless: input-parse-relaxer - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - template: $input-parse-relaxer < IN > OUT -parse-relax-input-devtest - in: split-input-devtest - out: input-devtest - default-name: tuning/input.devtest.parse-relaxed - pass-unless: input-parse-relaxer - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - ignore-unless: use-mira - template: $input-parse-relaxer < IN > OUT + template: $input-splitter -model IN1.$input-extension < IN > OUT reference-from-sgm in: reference-sgm input-sgm out: raw-reference @@ -1276,12 +1276,11 @@ truecase-input ignore-unless: input-truecaser template: $input-truecaser -model IN1.$input-extension < IN > OUT split-input - in: truecased-input + in: truecased-input SPLITTER:splitter-model out: split-input - rerun-on-change: input-splitter SPLITTER:splitter-model default-name: evaluation/input.split pass-unless: input-splitter - template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT + template: $input-splitter -model IN1.$input-extension < IN > OUT filter in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table out: filtered-dir diff --git a/util/exception.cc b/util/exception.cc index e644d2cb7..5ba06f065 100644 --- a/util/exception.cc +++ b/util/exception.cc @@ -25,7 +25,8 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun */ std::string old_text; std::swap(old_text, what_); - StringStream stream(what_); + StringStream stream; + stream << what_; stream << file << ':' << line; if (func) stream << " in " << func << " threw "; if (child_name) { diff --git a/util/string_stream.hh b/util/string_stream.hh index 730403d70..ee76a7a57 100644 --- a/util/string_stream.hh +++ b/util/string_stream.hh @@ -11,9 +11,13 @@ namespace util { class StringStream : public FakeOStream { public: // Semantics: appends to string. Remember to clear first! - explicit StringStream(std::string &out) - : out_(out) {} + explicit StringStream() + {} + /* + explicit StringStream(std::string &out) + : out_(out) {} + */ StringStream &flush() { return *this; } StringStream &write(const void *data, std::size_t length) { @@ -21,6 +25,13 @@ class StringStream : public FakeOStream { return *this; } + const std::string &str() const + { return out_; } + void str(const std::string &val) + { + out_ = val; + } + protected: friend class FakeOStream; char *Ensure(std::size_t amount) { @@ -36,7 +47,7 @@ class StringStream : public FakeOStream { } private: - std::string &out_; + std::string out_; }; } // namespace diff --git a/util/string_stream_test.cc b/util/string_stream_test.cc index 3a7734feb..ad996550c 100644 --- a/util/string_stream_test.cc +++ b/util/string_stream_test.cc @@ -11,9 +11,9 @@ namespace util { namespace { template void TestEqual(const T value) { - std::string str; - StringStream(str) << value; - BOOST_CHECK_EQUAL(boost::lexical_cast(value), str); + StringStream strme; + strme << value; + BOOST_CHECK_EQUAL(boost::lexical_cast(value), strme.str()); } template void TestCorners() { @@ -66,15 +66,15 @@ BOOST_AUTO_TEST_CASE(Strings) { non_const[0] = 'b'; non_const[1] = 'c'; non_const[2] = 0; - std::string out; - StringStream(out) << "a" << non_const << 'c'; - BOOST_CHECK_EQUAL("abcc", out); + + StringStream out; + out << "a" << non_const << 'c'; + BOOST_CHECK_EQUAL("abcc", out.str()); // Now test as a separate object. - out.clear(); - StringStream stream(out); + StringStream stream; stream << "a" << non_const << 'c' << piece; - BOOST_CHECK_EQUAL("abccabcdef", out); + BOOST_CHECK_EQUAL("abccabcdef", stream.str()); } }} // namespaces