From 1a26cb84140bde842b0b60c6888e7f169536e849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fulajt=C3=A1r?= Date: Thu, 27 Aug 2015 15:15:32 +0200 Subject: [PATCH 001/176] Added a simple support for the factored systems. --- scripts/analysis/oov.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/analysis/oov.pl b/scripts/analysis/oov.pl index 9756887c9..5228f0f45 100755 --- a/scripts/analysis/oov.pl +++ b/scripts/analysis/oov.pl @@ -176,6 +176,13 @@ sub ngrams { return { md5(encode_utf8($sent)) => 1 }; } else { my @words = split /\s+/, $sent; + + #factors + if ( $sent =~ m/[|]/) { + my $use_index = 0; # default factor is the first one + @words = map { ( split /[|]/, $_ ) [$use_index] } @words; + } + my $out; if ($n == 1) { foreach my $w (@words) { From dd9eb54ec4f1a59bf73a88755f2c7343d9d8dd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fulajt=C3=A1r?= Date: Mon, 12 Oct 2015 18:47:45 +0200 Subject: [PATCH 002/176] Named group added for the safer 'protected patterns' recognition regexp. In the original code there are the number references used , which might actualy colidate if any group is used inside the $protected_pattern string. for example the protected_pattenr (loaded from file ) : (http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+. If we use the number reference, the $2 will reffer to (http[s]?|ftp):, instead to (.*) inside the : while ($t =~ /($protected_pattern)(.*)$/) { Naming patterns resolves this issue. --- scripts/tokenizer/tokenizer.perl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl index a1eb01c0f..3aaad28a0 100755 --- a/scripts/tokenizer/tokenizer.perl +++ b/scripts/tokenizer/tokenizer.perl @@ -243,9 +243,9 @@ sub tokenize my @protected = (); foreach my $protected_pattern (@protected_patterns) { my $t = $text; - while ($t =~ /($protected_pattern)(.*)$/) { - push @protected, $1; - $t = $2; + while ($t =~ /(?$protected_pattern)(?.*)$/) { + push @protected, $+{PATTERN}; + $t = $+{TAIL}; } } From 64aea22425305d1a2c1e4c046e8d89f5f1536fd8 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Thu, 16 Jun 2016 14:20:28 +0100 Subject: [PATCH 003/176] Enabled c++0x flag for gcc. --- Jamroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jamroot b/Jamroot index 4118248c1..c8b28445e 100644 --- a/Jamroot +++ b/Jamroot @@ -111,7 +111,7 @@ external-lib z ; #lib dl : : static:static shared:shared ; #requirements += dl ; -#requirements += -std=c++0x ; +requirements += -std=c++0x ; # Allow moses to report the git commit hash of the version used for compilation moses_githash = [ _shell "git describe --dirty" ] ; From 14d7df229f20f5220b52233aad4ae2cf85679fc6 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Thu, 16 Jun 2016 14:22:10 +0100 Subject: [PATCH 004/176] Changed paths for vowpalwabbit to include directly from build directory. --- vw/Jamfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vw/Jamfile b/vw/Jamfile index 0eda14c9a..2e8547ae0 100644 --- a/vw/Jamfile +++ b/vw/Jamfile @@ -6,11 +6,11 @@ boost 103600 ; # VW local with-vw = [ option.get "with-vw" ] ; if $(with-vw) { - lib vw : : $(with-vw)/lib ; - lib allreduce : : $(with-vw)/lib ; + lib vw : : $(with-vw)/vowpalwabbit ; + lib allreduce : : $(with-vw)/vowpalwabbit ; - obj ClassifierFactory.o : ClassifierFactory.cpp headers : $(with-vw)/include/vowpalwabbit ; - obj VWPredictor.o : VWPredictor.cpp headers : $(with-vw)/include/vowpalwabbit ; + obj ClassifierFactory.o : ClassifierFactory.cpp headers : $(with-vw)/vowpalwabbit ; + obj VWPredictor.o : VWPredictor.cpp headers : $(with-vw)/vowpalwabbit ; alias vw_objects : VWPredictor.o ClassifierFactory.o vw allreduce : : : boost_program_options ; lib classifier : [ glob *.cpp : VWPredictor.cpp ClassifierFactory.cpp ] vw_objects headers ; From 5282ad667c4fd6e0fe111e3e982c950c5e4af6b3 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Thu, 16 Jun 2016 14:22:58 +0100 Subject: [PATCH 005/176] Global scope for VW training. --- moses-cmd/MainVW.cpp | 13 ++++++------- moses/TrainingTask.h | 12 ++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp index 2f313df01..0e611b139 100644 --- a/moses-cmd/MainVW.cpp +++ b/moses-cmd/MainVW.cpp @@ -51,12 +51,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "hypergraph.pb.h" #endif -#ifdef PT_UG -#include -#include "moses/TranslationModel/UG/mmsapt.h" -#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" -#endif - using namespace std; using namespace Moses; @@ -76,6 +70,9 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream) /** main function of the command line version of the decoder **/ int main(int argc, char const** argv) { + //setting in the Staticdata a link between the thread id of this process and a NULL tasksptr + // StaticData::InstanceNonConst().SetTask(); // => moved into StaticData constructor + try { #ifdef HAVE_PROTOBUF @@ -141,11 +138,13 @@ int main(int argc, char const** argv) } #ifdef WITH_THREADS +#pragma message ("Compiling with Threads.") ThreadPool pool(staticData.ThreadCount()); #endif // main loop over set of input sentences + boost::shared_ptr scope(new ContextScope); boost::shared_ptr source; while ((source = ioWrapper->ReadInput()) != NULL) { IFVERBOSE(1) { @@ -154,7 +153,7 @@ int main(int argc, char const** argv) // set up task of training one sentence boost::shared_ptr task; - task = TrainingTask::create(source, ioWrapper); + task = TrainingTask::create(source, ioWrapper, scope); // execute task #ifdef WITH_THREADS diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h index 4d2152920..eed6d7d4b 100644 --- a/moses/TrainingTask.h +++ b/moses/TrainingTask.h @@ -39,6 +39,18 @@ public: boost::shared_ptr const& ioWrapper) { boost::shared_ptr ret(new TrainingTask(source, ioWrapper)); ret->m_self = ret; + ret->m_scope.reset(new ContextScope); + return ret; + } + + // factory function + static boost::shared_ptr + create(boost::shared_ptr const& source, + boost::shared_ptr const& ioWrapper, + boost::shared_ptr const& scope) { + boost::shared_ptr ret(new TrainingTask(source, ioWrapper)); + ret->m_self = ret; + ret->m_scope = scope; return ret; } From 6bc376a843a02a31c2a3279ae85de2847c692c0b Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Sat, 12 Nov 2016 11:18:53 -0600 Subject: [PATCH 006/176] Clean up code --- moses/LM/Reloading.h | 99 ++------------------------------------------ 1 file changed, 4 insertions(+), 95 deletions(-) diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h index d5ae83d17..7075cb429 100644 --- a/moses/LM/Reloading.h +++ b/moses/LM/Reloading.h @@ -35,113 +35,22 @@ namespace Moses { class FFState; - -//LanguageModel *ConstructReloadingLM(const std::string &line); -//LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy); -/* - namespace { -class MappingBuilder : public lm::EnumerateVocab -{ -public: - MappingBuilder(FactorCollection &factorCollection, std::vector &mapping) - : m_factorCollection(factorCollection), m_mapping(mapping) {} - - void Add(lm::WordIndex index, const StringPiece &str) { - std::size_t factorId = m_factorCollection.AddFactor(str)->GetId(); - if (m_mapping.size() <= factorId) { - // 0 is :-) - m_mapping.resize(factorId + 1); - } - m_mapping[factorId] = index; - } - -private: - FactorCollection &m_factorCollection; - std::vector &m_mapping; -}; - } -*/ template class ReloadingLanguageModel : public LanguageModelKen { public: - // TODO(Lane) copy less code, update to load_method + ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) { - std::cerr << "ReloadingLM constructor: " << m_file << std::endl; - // std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl; + VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl); } virtual void InitializeForInput(ttasksptr const& ttask) { - std::cerr << "ReloadingLM InitializeForInput" << std::endl; - // TODO(lane): load_method + VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl); + LanguageModelKen::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ); - /* - lm::ngram::Config config; - if(this->m_verbosity >= 1) { - config.messages = &std::cerr; - } else { - config.messages = NULL; - } - FactorCollection &collection = FactorCollection::Instance(); - MappingBuilder builder(collection, m_lmIdLookup); - config.enumerate_vocab = &builder; - config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ; - - m_ngram.reset(new Model(m_file.c_str(), config)); - - m_beginSentenceFactor = collection.AddFactor(BOS_); - */ }; - /* - ReloadingLanguageModel(const std::string &line) : LanguageModelKen(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) { - std::cerr << "ReloadingLM constructor" << std::endl; - std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl; - } - */ - /* - ~ReloadingLanguageModel() { - delete m_lm; - } - - virtual const FFState *EmptyHypothesisState(const InputType &input) const { - return m_lm->EmptyHypothesisState(input); - } - - virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { - m_lm->CalcScore(phrase, fullScore, ngramScore, oovCount); - } - - virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { - return m_lm->EvaluateWhenApplied(hypo, ps, out); - } - - virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const { - return m_lm->EvaluateWhenApplied(cur_hypo, featureID, accumulator); - } - - virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const { - return m_lm->EvaluateWhenApplied(hyperedge, featureID, accumulator); - } - - virtual void IncrementalCallback(Incremental::Manager &manager) const { - m_lm->IncrementalCallback(manager); - } - - virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const { - m_lm->ReportHistoryOrder(out, phrase); - } - - virtual bool IsUseable(const FactorMask &mask) const { - return m_lm->IsUseable(mask); - } - - - private: - - LanguageModel *m_lm; - */ protected: From 988038af685e133e0d3a5408eeaa81324d9fa9de Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Sat, 12 Nov 2016 11:28:26 -0600 Subject: [PATCH 007/176] Moses server can now accept and store data associated with a "context-scope" key --- moses/server/TranslationRequest.cpp | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 6f6fed1cf..4e97cff6a 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -24,6 +24,8 @@ using Moses::FValue; using Moses::PhraseDictionaryMultiModel; using Moses::FindPhraseDictionary; using Moses::Sentence; +using Moses::TokenizeMultiCharSeparator; +using Moses::FeatureFunction; boost::shared_ptr TranslationRequest:: @@ -312,6 +314,44 @@ parse_request(std::map const& params) m_context.reset(new std::vector(1,context)); } + si = params.find("context-scope"); + if (si != params.end()) + { + + string context = xmlrpc_c::value_string(si->second); + + string groupSeparator("Moses::ContextScope::GroupSeparator"); + string recordSeparator("Moses::ContextScope::RecordSeparator"); + + // Here, we assume that any XML-RPC value + // associated with the key "context-scope" + // has the following format: + // + // FeatureFunctionName followed by recordSeparator + // followed by the value of interest + // followed by groupSeparator + // + // In the following code, the value of interest will be stored + // in contextScope under the key FeatureFunctionName, + // where FeatureFunctionName is the actual name of the feature function + + boost::shared_ptr contextScope = GetScope(); + + BOOST_FOREACH(string group, TokenizeMultiCharSeparator(context, groupSeparator)) { + + vector record = TokenizeMultiCharSeparator(group, recordSeparator); + + // Use the feature function whose name is record[0] as a key + FeatureFunction& ff = Moses::FeatureFunction::FindFeatureFunction(record[0]); + void const* key = static_cast(&ff); + + // Store (in the context scope) record[1] as the value associated with that key + boost::shared_ptr value = contextScope->get(key,true); + value->replace(value->begin(), value->end(), record[1]); + + } + } + // // biased sampling for suffix-array-based sampling phrase table? // if ((si = params.find("bias")) != params.end()) From c854df84cb869d9e74b8ce1d7b47196c7b1a6949 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Sat, 12 Nov 2016 11:31:43 -0600 Subject: [PATCH 008/176] Add per-sentence on-demand translation model. This translation model reads its phrase table from a TranslationTask object's ContextScope. This data can come from, for example, a mosesserver XML-RPC client. --- moses/FF/Factory.cpp | 2 + ...aseDictionaryMemoryPerSentenceOnDemand.cpp | 145 ++++++++++++++++++ ...hraseDictionaryMemoryPerSentenceOnDemand.h | 46 ++++++ 3 files changed, 193 insertions(+) create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index a41b8cb2c..a048410d0 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -16,6 +16,7 @@ #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h" #include "moses/TranslationModel/ProbingPT/ProbingPT.h" #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h" +#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h" #include "moses/FF/LexicalReordering/LexicalReordering.h" @@ -244,6 +245,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(PhraseDictionaryFuzzyMatch); MOSES_FNAME(ProbingPT); MOSES_FNAME(PhraseDictionaryMemoryPerSentence); + MOSES_FNAME(PhraseDictionaryMemoryPerSentenceOnDemand); MOSES_FNAME2("RuleTable", Syntax::RuleTableFF); MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF); diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp new file mode 100644 index 000000000..db570968c --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -0,0 +1,145 @@ +// vim:tabstop=2 +#include "PhraseDictionaryMemoryPerSentenceOnDemand.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" +#include + +using namespace std; + +namespace Moses +{ +PhraseDictionaryMemoryPerSentenceOnDemand::PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line) + : PhraseDictionary(line, true) +{ + ReadParameters(); +} + +void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts) +{ + m_options = opts; + SetFeaturesToApply(); + + // don't load anything. Load when we have the input +} + + +TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const { + + Coll &coll = GetColl(); + + return coll[source]; + +} + + +void PhraseDictionaryMemoryPerSentenceOnDemand::InitializeForInput(ttasksptr const& ttask) +{ + Coll &coll = GetColl(); + coll.clear(); + + VERBOSE(2, "Initializing PhraseDictionaryMemoryPerSentenceOnDemand " << m_description << "\n"); + + // The context scope object for this translation task + // contains a map of translation task-specific data + boost::shared_ptr contextScope = ttask->GetScope(); + + // The key to the map is this object + void const* key = static_cast(this); + + // The value stored in the map is a string representing a phrase table + boost::shared_ptr value = contextScope->get(key); + + // Create a stream to read the phrase table data + stringstream strme(*(value.get())); + + // Read the phrase table data, one line at a time + string line; + while (getline(strme, line)) { + + VERBOSE(3, "\t" << line); + + vector toks = TokenizeMultiCharSeparator(line, "|||"); + Phrase source; + source.CreateFromString(Input, m_input, toks[0], NULL); + + TargetPhrase *target = new TargetPhrase(this); + target->CreateFromString(Output, m_output, toks[1], NULL); + + // score for this phrase table + vector scores = Tokenize(toks[2]); + std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore); + std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore); + target->GetScoreBreakdown().PlusEquals(this, scores); + + // score of all other ff when this rule is being loaded + target->EvaluateInIsolation(source, GetFeaturesToApply()); + + // add to coll + TargetPhraseCollection::shared_ptr &tpsPtr = coll[source]; + TargetPhraseCollection *tps = tpsPtr.get(); + if (tps == NULL) { + tps = new TargetPhraseCollection(); + tpsPtr.reset(tps); + } + tps->Add(target); + } +} + +void PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +{ + InputPathList::const_iterator iter; + for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { + InputPath &inputPath = **iter; + const Phrase &source = inputPath.GetPhrase(); + + Coll &coll = GetColl(); + Coll::const_iterator iter = coll.find(source); + if (iter == coll.end()) { + TargetPhraseCollection::shared_ptr tprPtr; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } else { + const TargetPhraseCollection::shared_ptr &tprPtr = iter->second; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } + } +} + + +ChartRuleLookupManager* PhraseDictionaryMemoryPerSentenceOnDemand::CreateRuleLookupManager(const ChartParser &parser, + const ChartCellCollectionBase &cellCollection, + std::size_t /*maxChartSpan*/) +{ + abort(); +} + +PhraseDictionaryMemoryPerSentenceOnDemand::Coll &PhraseDictionaryMemoryPerSentenceOnDemand::GetColl() const +{ + Coll *coll; + coll = m_coll.get(); + if (coll == NULL) { + coll = new Coll; + m_coll.reset(coll); + } + assert(coll); + return *coll; +} + +void +PhraseDictionaryMemoryPerSentenceOnDemand::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "path") { + UTIL_THROW(util::Exception, "PhraseDictionaryMemoryPerSentenceOnDemand does not support key \"path\"."); + } else { + PhraseDictionary::SetParameter(key, value); + } +} + + +TO_STRING_BODY(PhraseDictionaryMemoryPerSentenceOnDemand); + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryMemoryPerSentenceOnDemand& phraseDict) +{ + return out; +} + +} diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h new file mode 100644 index 000000000..bcda0ef77 --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h @@ -0,0 +1,46 @@ + +#pragma once + +#include "PhraseDictionary.h" +#include "moses/TypeDef.h" +#include "moses/TranslationTask.h" + +namespace Moses +{ +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; + +class PhraseDictionaryMemoryPerSentenceOnDemand : public PhraseDictionary +{ + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemoryPerSentenceOnDemand&); + +public: + PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line); + + void Load(AllOptions::ptr const& opts); + + void InitializeForInput(ttasksptr const& ttask); + + // for phrase-based model + void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + void SetParameter(const std::string& key, const std::string& value); + + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; + + TO_STRING(); + + +protected: + typedef boost::unordered_map Coll; + mutable boost::thread_specific_ptr m_coll; + + Coll &GetColl() const; + +}; + +} // namespace Moses From 28c8ff5ad01f9824405955102807616a376866b1 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Sun, 13 Nov 2016 00:00:33 +0000 Subject: [PATCH 009/176] daily automatic beautifier --- .../PhraseDictionaryMemoryPerSentenceOnDemand.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp index db570968c..4675d06c8 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -22,7 +22,8 @@ void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts } -TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const { +TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const +{ Coll &coll = GetColl(); From 05006bf1e2c68395a63ebd21a4f7ee56e38f260f Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 14 Nov 2016 13:26:34 -0600 Subject: [PATCH 010/176] Allow XML-RPC requests to update weights --- moses/LM/Reloading.h | 26 +++++++++++++ moses/server/TranslationRequest.cpp | 57 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h index 7075cb429..88f8e8869 100644 --- a/moses/LM/Reloading.h +++ b/moses/LM/Reloading.h @@ -48,6 +48,32 @@ public: virtual void InitializeForInput(ttasksptr const& ttask) { VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl); + // The context scope object for this translation task + // contains a map of translation task-specific data + boost::shared_ptr contextScope = ttask->GetScope(); + + // The key to the map is this object + void const* key = static_cast(this); + + // The value stored in the map is a string representing a phrase table + boost::shared_ptr value = contextScope->get(key); + + // Create a stream to read the phrase table data + stringstream strme(*(value.get())); + + ofstream tmp; + tmp.open(m_file.c_str()); + + // Read the phrase table data, one line at a time + string line; + while (getline(strme, line)) { + + tmp << line << "\n"; + + } + + tmp.close(); + LanguageModelKen::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ); }; diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 4e97cff6a..e1821a265 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -26,6 +26,7 @@ using Moses::FindPhraseDictionary; using Moses::Sentence; using Moses::TokenizeMultiCharSeparator; using Moses::FeatureFunction; +using Moses::Scan; boost::shared_ptr TranslationRequest:: @@ -352,6 +353,62 @@ parse_request(std::map const& params) } } + si = params.find("weights"); + if (si != params.end()) + { + + boost::unordered_map map; + { + const vector &ffs = FeatureFunction::GetFeatureFunctions(); + BOOST_FOREACH(FeatureFunction* const& ff, ffs) { + map[ff->GetScoreProducerDescription()] = ff; + } + } + + string allValues = xmlrpc_c::value_string(si->second); + + BOOST_FOREACH(string values, TokenizeMultiCharSeparator(allValues, "\t")) { + + vector record = TokenizeMultiCharSeparator(values, "="); + + if (record.size() == 2) { + string featureName = record[0]; + string featureWeights = record[1]; + + boost::unordered_map::iterator ffi = map.find(featureName); + + if (ffi != map.end()) { + FeatureFunction* ff = ffi->second; + + size_t prevNumWeights = ff->GetNumScoreComponents(); + + vector ffWeights; + BOOST_FOREACH(string weight, TokenizeMultiCharSeparator(featureWeights, " ")) { + ffWeights.push_back(Scan(weight)); + } + + if (ffWeights.size() == ff->GetNumScoreComponents()) { + + // XXX: This is NOT thread-safe + Moses::StaticData::InstanceNonConst().SetWeights(ff, ffWeights); + VERBOSE(1, "WARNING: THIS IS NOT THREAD-SAFE!\tUpdating weights for " << featureName << " to " << featureWeights << "\n"); + + } else { + TRACE_ERR("ERROR: Unable to update weights for " << featureName << " because " << ff->GetNumScoreComponents() << " weights are required but only " << ffWeights.size() << " were provided\n"); + } + + } else { + TRACE_ERR("ERROR: No FeatureFunction with name " << featureName << ", no weight update\n"); + } + + } else { + TRACE_ERR("WARNING: XML-RPC weights update was improperly formatted:\t" << values << "\n"); + } + + } + + } + // // biased sampling for suffix-array-based sampling phrase table? // if ((si = params.find("bias")) != params.end()) From a0b8c570704ccaccfb5f4fab0e709b27c6a908ed Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 14 Nov 2016 23:43:19 +0000 Subject: [PATCH 011/176] error message --- moses/LM/Implementation.cpp | 3 ++- moses/LM/SkeletonLM.cpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index eb67100ca..c8fc5df32 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -41,7 +41,8 @@ using namespace std; namespace Moses { LanguageModelImplementation::LanguageModelImplementation(const std::string &line) - :LanguageModel(line) +:LanguageModel(line) +,m_nGramOrder(NOT_FOUND) { } diff --git a/moses/LM/SkeletonLM.cpp b/moses/LM/SkeletonLM.cpp index 23958e688..f944de23a 100644 --- a/moses/LM/SkeletonLM.cpp +++ b/moses/LM/SkeletonLM.cpp @@ -11,6 +11,9 @@ SkeletonLM::SkeletonLM(const std::string &line) { ReadParameters(); + UTIL_THROW_IF2(m_nGramOrder == NOT_FOUND, "Must set order"); + UTIL_THROW_IF2(m_nGramOrder <= 1, "Ngram order must be more than 1"); + FactorCollection &factorCollection = FactorCollection::Instance(); // needed by parent language model classes. Why didn't they set these themselves? From 9519dca56f77ee25f8b253324440d6ac5f36b9b8 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Tue, 15 Nov 2016 00:00:37 +0000 Subject: [PATCH 012/176] daily automatic beautifier --- moses/LM/Implementation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index c8fc5df32..3208c3a7b 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -41,8 +41,8 @@ using namespace std; namespace Moses { LanguageModelImplementation::LanguageModelImplementation(const std::string &line) -:LanguageModel(line) -,m_nGramOrder(NOT_FOUND) + :LanguageModel(line) + ,m_nGramOrder(NOT_FOUND) { } From 7a150bb060e57a1409a5a26643ce87789a08a582 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sun, 20 Nov 2016 20:51:06 +0000 Subject: [PATCH 013/176] eclipse --- contrib/moses2/.cproject | 8 ++++---- contrib/other-builds/OnDiskPt/.cproject | 8 ++++---- contrib/other-builds/moses/.cproject | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/contrib/moses2/.cproject b/contrib/moses2/.cproject index a8e149f99..82b82d591 100644 --- a/contrib/moses2/.cproject +++ b/contrib/moses2/.cproject @@ -11,12 +11,12 @@ + + - - @@ -106,13 +106,13 @@ + + - - diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject index e32a5baea..f551380fd 100644 --- a/contrib/other-builds/OnDiskPt/.cproject +++ b/contrib/other-builds/OnDiskPt/.cproject @@ -11,12 +11,12 @@ + + - - @@ -72,13 +72,13 @@ + + - - diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 6945d7ecf..81da1d22b 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,11 +11,11 @@ + - @@ -86,12 +86,12 @@ + - From 50bbf7cb2671a71db2cf7686ebabee8a9a2bb9f8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 10:08:12 +0000 Subject: [PATCH 014/176] eclipse --- .../other-builds/CreateProbingPT2/.cproject | 162 ------------------ .../other-builds/CreateProbingPT2/.project | 38 ---- 2 files changed, 200 deletions(-) delete mode 100644 contrib/other-builds/CreateProbingPT2/.cproject delete mode 100644 contrib/other-builds/CreateProbingPT2/.project diff --git a/contrib/other-builds/CreateProbingPT2/.cproject b/contrib/other-builds/CreateProbingPT2/.cproject deleted file mode 100644 index 51aaee6e6..000000000 --- a/contrib/other-builds/CreateProbingPT2/.cproject +++ /dev/null @@ -1,162 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/CreateProbingPT2/.project b/contrib/other-builds/CreateProbingPT2/.project deleted file mode 100644 index 1f8c4bd3e..000000000 --- a/contrib/other-builds/CreateProbingPT2/.project +++ /dev/null @@ -1,38 +0,0 @@ - - - CreateProbingPT2 - - - lm - moses - moses2 - util - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - CreateProbingPT2.cpp - 1 - PARENT-2-PROJECT_LOC/moses2/CreateProbingPT2.cpp - - - From 1fe261b5d49d69e6467df275ac0d88213745af61 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 10:35:30 +0000 Subject: [PATCH 015/176] eclipse --- contrib/other-builds/CreateOnDiskPt/.cproject | 9 +++++---- .../other-builds/CreateProbingPT/.cproject | 9 +++++---- contrib/other-builds/consolidate/.cproject | 1 + contrib/other-builds/extractor/.cproject | 5 +++-- contrib/other-builds/moses/.project | 20 +++++++++++++++++++ contrib/other-builds/server/.cproject | 5 +++-- 6 files changed, 37 insertions(+), 12 deletions(-) diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject index f52490c1a..95c0a6a01 100644 --- a/contrib/other-builds/CreateOnDiskPt/.cproject +++ b/contrib/other-builds/CreateOnDiskPt/.cproject @@ -5,16 +5,16 @@ + - - + @@ -78,6 +78,7 @@ + @@ -97,16 +98,16 @@ + - - + diff --git a/contrib/other-builds/CreateProbingPT/.cproject b/contrib/other-builds/CreateProbingPT/.cproject index f925736af..ef52fa87a 100644 --- a/contrib/other-builds/CreateProbingPT/.cproject +++ b/contrib/other-builds/CreateProbingPT/.cproject @@ -5,16 +5,16 @@ + - - + @@ -40,6 +40,7 @@ + diff --git a/contrib/other-builds/extractor/.cproject b/contrib/other-builds/extractor/.cproject index 613c41d5c..79805f176 100644 --- a/contrib/other-builds/extractor/.cproject +++ b/contrib/other-builds/extractor/.cproject @@ -14,7 +14,7 @@ - + @@ -40,6 +40,7 @@ + - + diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index c6b7de6f7..0f397df33 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -565,6 +565,16 @@ 1 PARENT-3-PROJECT_LOC/moses/OutputCollector.h + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/moses/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/moses/OutputFileStream.h + PCNTools.cpp 1 @@ -2175,6 +2185,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h + + TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp + + + TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h + TranslationModel/PhraseDictionaryMultiModel.cpp 1 diff --git a/contrib/other-builds/server/.cproject b/contrib/other-builds/server/.cproject index 281403516..9789dbfb7 100644 --- a/contrib/other-builds/server/.cproject +++ b/contrib/other-builds/server/.cproject @@ -15,7 +15,7 @@ - + @@ -52,6 +52,7 @@ + - + From 99da2f1b7a818f45ac1f64c56a9fffb13c67146d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 11:40:45 +0000 Subject: [PATCH 016/176] missing header file --- contrib/moses2/legacy/OutputCollector.h | 1 + moses/OutputCollector.h | 1 + 2 files changed, 2 insertions(+) diff --git a/contrib/moses2/legacy/OutputCollector.h b/contrib/moses2/legacy/OutputCollector.h index 7529cd352..5504d9add 100644 --- a/contrib/moses2/legacy/OutputCollector.h +++ b/contrib/moses2/legacy/OutputCollector.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "util/exception.hh" diff --git a/moses/OutputCollector.h b/moses/OutputCollector.h index 0d6f37472..797cc85cf 100644 --- a/moses/OutputCollector.h +++ b/moses/OutputCollector.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "Util.h" #include "util/exception.hh" From 87dbd677da21458cd5e616ae857c09385d5d4819 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 23 Nov 2016 13:11:53 +0000 Subject: [PATCH 017/176] missing header file --- contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h index 5706fca09..b91ef8f6c 100644 --- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h +++ b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h @@ -29,6 +29,7 @@ #include #include #include +#include #include "MurmurHash3.h" #include "StringVector.h" From 288af6e42572007283e02f225d919cfc5255d867 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 28 Nov 2016 10:51:56 +0000 Subject: [PATCH 018/176] move function to cpp file --- contrib/moses2/PhraseBased/PhraseImpl.cpp | 12 ++++++++++++ contrib/moses2/PhraseBased/PhraseImpl.h | 12 +----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/contrib/moses2/PhraseBased/PhraseImpl.cpp b/contrib/moses2/PhraseBased/PhraseImpl.cpp index 004792147..00f55a35b 100644 --- a/contrib/moses2/PhraseBased/PhraseImpl.cpp +++ b/contrib/moses2/PhraseBased/PhraseImpl.cpp @@ -10,6 +10,18 @@ using namespace std; namespace Moses2 { +PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str) +{ + std::vector toks = Moses2::Tokenize(str); + size_t size = toks.size(); + PhraseImpl *ret; + + ret = new (pool.Allocate()) PhraseImpl(pool, size); + + ret->PhraseImplTemplate::CreateFromString(vocab, system, toks); + return ret; +} } diff --git a/contrib/moses2/PhraseBased/PhraseImpl.h b/contrib/moses2/PhraseBased/PhraseImpl.h index a7db9a9e5..787cdf58d 100644 --- a/contrib/moses2/PhraseBased/PhraseImpl.h +++ b/contrib/moses2/PhraseBased/PhraseImpl.h @@ -9,17 +9,7 @@ class PhraseImpl: public PhraseImplTemplate { public: static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) - { - std::vector toks = Moses2::Tokenize(str); - size_t size = toks.size(); - PhraseImpl *ret; - - ret = new (pool.Allocate()) PhraseImpl(pool, size); - - ret->PhraseImplTemplate::CreateFromString(vocab, system, toks); - return ret; - } + const System &system, const std::string &str); PhraseImpl(MemPool &pool, size_t size) : PhraseImplTemplate(pool, size) From 0f12557e1928637934b629ab4dd133bee4ff9f32 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 12:55:20 +0000 Subject: [PATCH 019/176] port beam threshold from Moses --- contrib/moses2/HypothesisColl.cpp | 19 +++++++++++++++++++ contrib/moses2/HypothesisColl.h | 3 +++ contrib/moses2/legacy/Parameter.cpp | 4 ++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 2af5465e3..a47c7a5de 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,6 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) : m_coll(MemPoolAllocator(mgr.GetPool())), m_sortedHypos( NULL) { + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = -std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -50,6 +52,23 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { + SCORE futureScore = hypo->GetFutureScore(); + if (futureScore < m_worstScore) { + // beam threshold + hypoRecycle.Recycle(hypo); + return; + } + + if (futureScore > m_bestScore) { + m_bestScore = hypo->GetFutureScore(); + + // this may also affect the worst score + SCORE beamWidth = system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + StackAdd added = Add(hypo); size_t nbestSize = system.options.nbest.nbest_size; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index cea3bee1b..fa99ccc08 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,6 +60,9 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; + SCORE m_bestScore; + SCORE m_worstScore; + StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index ea1b962a8..666eb0e98 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -79,8 +79,8 @@ Parameter::Parameter() desc += "8=tree-to-string (SCFG-based)\n"; desc += "9=forest-to-string"; AddParam(search_opts, "search-algorithm", desc); - //AddParam(search_opts, "beam-threshold", "b", - // "threshold for threshold pruning"); + AddParam(search_opts, "beam-threshold", "b", + "threshold for threshold pruning"); //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", From ef105a1a9b6a9ae731e243c80c760215b765636d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 14:41:30 +0000 Subject: [PATCH 020/176] reset beam variables when clearing ministack. They are reused --- contrib/moses2/HypothesisColl.cpp | 21 +++++++++++++++---- .../CubePruningMiniStack/Search.cpp | 3 +-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index a47c7a5de..a42c755aa 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -18,9 +18,9 @@ using namespace std; namespace Moses2 { -HypothesisColl::HypothesisColl(const ManagerBase &mgr) : - m_coll(MemPoolAllocator(mgr.GetPool())), m_sortedHypos( - NULL) +HypothesisColl::HypothesisColl(const ManagerBase &mgr) +:m_coll(MemPoolAllocator(mgr.GetPool())) +,m_sortedHypos(NULL) { m_bestScore = -std::numeric_limits::infinity(); m_worstScore = -std::numeric_limits::infinity(); @@ -53,17 +53,28 @@ void HypothesisColl::Add( ArcLists &arcLists) { SCORE futureScore = hypo->GetFutureScore(); + /* + cerr << "scores:" + << futureScore << " " + << m_bestScore << " " + << m_worstScore << " " + << GetSize() << " " + << endl; + */ if (futureScore < m_worstScore) { // beam threshold + //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; } + //cerr << "OK:" << hypo->Debug(system) << endl; if (futureScore > m_bestScore) { m_bestScore = hypo->GetFutureScore(); // this may also affect the worst score SCORE beamWidth = system.options.search.beam_width; + //cerr << "beamWidth=" << beamWidth << endl; if ( m_bestScore + beamWidth > m_worstScore ) { m_worstScore = m_bestScore + beamWidth; } @@ -112,7 +123,7 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) } } - assert(false); + //assert(false); } const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( @@ -192,6 +203,8 @@ void HypothesisColl::Clear() { m_sortedHypos = NULL; m_coll.clear(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = -std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index 8598b3494..d74cb7d99 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -73,10 +73,9 @@ void Search::Decode() //cerr << "stackInd=" << stackInd << endl; m_stack.Clear(); Decode(stackInd); - PostDecode(stackInd); + PostDecode(stackInd); //m_stack.DebugCounts(); - //cerr << m_stacks << endl; } } From a269d9ab71c310423b9f1eb02cafcd4d8a554f14 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 15:31:22 +0000 Subject: [PATCH 021/176] rename m_worseScore -> m_minBeamScore --- contrib/moses2/HypothesisColl.cpp | 12 ++++++------ contrib/moses2/HypothesisColl.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index a42c755aa..98673369f 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -23,7 +23,7 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) ,m_sortedHypos(NULL) { m_bestScore = -std::numeric_limits::infinity(); - m_worstScore = -std::numeric_limits::infinity(); + m_minBeamScore = -std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -57,11 +57,11 @@ void HypothesisColl::Add( cerr << "scores:" << futureScore << " " << m_bestScore << " " - << m_worstScore << " " + << m_minBeamScore << " " << GetSize() << " " << endl; */ - if (futureScore < m_worstScore) { + if (futureScore < m_minBeamScore) { // beam threshold //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); @@ -75,8 +75,8 @@ void HypothesisColl::Add( // this may also affect the worst score SCORE beamWidth = system.options.search.beam_width; //cerr << "beamWidth=" << beamWidth << endl; - if ( m_bestScore + beamWidth > m_worstScore ) { - m_worstScore = m_bestScore + beamWidth; + if ( m_bestScore + beamWidth > m_minBeamScore ) { + m_minBeamScore = m_bestScore + beamWidth; } } @@ -204,7 +204,7 @@ void HypothesisColl::Clear() m_sortedHypos = NULL; m_coll.clear(); m_bestScore = -std::numeric_limits::infinity(); - m_worstScore = -std::numeric_limits::infinity(); + m_minBeamScore = -std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index fa99ccc08..46d403544 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -61,7 +61,7 @@ protected: mutable Hypotheses *m_sortedHypos; SCORE m_bestScore; - SCORE m_worstScore; + SCORE m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From 525d7272e1ae1e173c59090c8c912a302b6fd49a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 16:43:06 +0000 Subject: [PATCH 022/176] separate m_bestScore and m_minBeamScore. Conflated with m_worseScore --- contrib/moses2/HypothesisColl.cpp | 13 +++++++++++-- contrib/moses2/HypothesisColl.h | 3 +-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 98673369f..13863ad7d 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -24,6 +24,7 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) { m_bestScore = -std::numeric_limits::infinity(); m_minBeamScore = -std::numeric_limits::infinity(); + m_worseScore = std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -52,6 +53,7 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { + size_t stackSize = system.options.search.stack_size; SCORE futureScore = hypo->GetFutureScore(); /* cerr << "scores:" @@ -61,8 +63,10 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (futureScore < m_minBeamScore) { - // beam threshold + if (futureScore < m_minBeamScore + || (GetSize() >= stackSize) && futureScore < m_worseScore ) { + // beam threshold or really bad hypo that won't make the pruning cut + // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -104,6 +108,10 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists + if (hypo->GetFutureScore() < m_worseScore) { + m_worseScore = hypo->GetFutureScore(); + } + return StackAdd(true, NULL); } else { @@ -205,6 +213,7 @@ void HypothesisColl::Clear() m_coll.clear(); m_bestScore = -std::numeric_limits::infinity(); m_minBeamScore = -std::numeric_limits::infinity(); + m_worseScore = std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 46d403544..352504cf8 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,8 +60,7 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - SCORE m_bestScore; - SCORE m_minBeamScore; + SCORE m_bestScore, m_worseScore, m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From b572841b09904a2a225224eb4597f5e73e4af5e8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:11:50 +0000 Subject: [PATCH 023/176] delete beam threshold variable, keep worse score discarding --- contrib/moses2/HypothesisColl.cpp | 25 +++++++++++++++++-------- contrib/moses2/HypothesisColl.h | 4 +++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 13863ad7d..701ff318d 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,8 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) :m_coll(MemPoolAllocator(mgr.GetPool())) ,m_sortedHypos(NULL) { - m_bestScore = -std::numeric_limits::infinity(); - m_minBeamScore = -std::numeric_limits::infinity(); + //m_bestScore = -std::numeric_limits::infinity(); + //m_minBeamScore = -std::numeric_limits::infinity(); m_worseScore = std::numeric_limits::infinity(); } @@ -63,15 +63,21 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (futureScore < m_minBeamScore - || (GetSize() >= stackSize) && futureScore < m_worseScore ) { + if (GetSize() >= stackSize && futureScore < m_worseScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard:" << hypo->Debug(system) << endl; + //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; } - //cerr << "OK:" << hypo->Debug(system) << endl; + /* + if (futureScore < m_minBeamScore) { + // beam threshold or really bad hypo that won't make the pruning cut + // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; + hypoRecycle.Recycle(hypo); + return; + } if (futureScore > m_bestScore) { m_bestScore = hypo->GetFutureScore(); @@ -83,6 +89,8 @@ void HypothesisColl::Add( m_minBeamScore = m_bestScore + beamWidth; } } + //cerr << "OK:" << hypo->Debug(system) << endl; + */ StackAdd added = Add(hypo); @@ -211,8 +219,9 @@ void HypothesisColl::Clear() { m_sortedHypos = NULL; m_coll.clear(); - m_bestScore = -std::numeric_limits::infinity(); - m_minBeamScore = -std::numeric_limits::infinity(); + + //m_bestScore = -std::numeric_limits::infinity(); + //m_minBeamScore = -std::numeric_limits::infinity(); m_worseScore = std::numeric_limits::infinity(); } diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 352504cf8..d89f28c7c 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,7 +60,9 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - SCORE m_bestScore, m_worseScore, m_minBeamScore; + //SCORE m_bestScore; + SCORE m_worseScore; + //SCORE m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From a95a461ec75c2d342b07e723eccb1e9e32c6dfc6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:17:55 +0000 Subject: [PATCH 024/176] delete unused param to get rid of annoying but irrelevant warning in clang --- contrib/moses2/SCFG/Manager.cpp | 3 +-- contrib/moses2/SCFG/nbest/NBest.cpp | 7 +++---- contrib/moses2/SCFG/nbest/NBest.h | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/SCFG/Manager.cpp b/contrib/moses2/SCFG/Manager.cpp index 5f1b731c0..5db4e2a89 100644 --- a/contrib/moses2/SCFG/Manager.cpp +++ b/contrib/moses2/SCFG/Manager.cpp @@ -90,9 +90,8 @@ void Manager::Decode() } } - const Stack *stack; - /* + const Stack *stack; stack = &m_stacks.GetStack(0, 5); cerr << "stack 0,12:" << stack->Debug(system) << endl; */ diff --git a/contrib/moses2/SCFG/nbest/NBest.cpp b/contrib/moses2/SCFG/nbest/NBest.cpp index e8515d49b..99c005ee3 100644 --- a/contrib/moses2/SCFG/nbest/NBest.cpp +++ b/contrib/moses2/SCFG/nbest/NBest.cpp @@ -50,7 +50,7 @@ NBest::NBest( } stringstream strm; - OutputToStream(mgr, strm, nbestColl); + OutputToStream(mgr, strm); m_str = strm.str(); } @@ -83,7 +83,7 @@ NBest::NBest(const SCFG::Manager &mgr, m_scores->PlusEquals(mgr.system, newScores); stringstream strm; - OutputToStream(mgr, strm, nbestColl); + OutputToStream(mgr, strm); m_str = strm.str(); } @@ -141,8 +141,7 @@ void NBest::CreateDeviants( void NBest::OutputToStream( const SCFG::Manager &mgr, - std::stringstream &strm, - const NBestColl &nbestColl) const + std::stringstream &strm) const { const SCFG::Hypothesis &hypo = GetHypo(); //strm << &hypo << " "; diff --git a/contrib/moses2/SCFG/nbest/NBest.h b/contrib/moses2/SCFG/nbest/NBest.h index 3e4c6112a..fa21866bb 100644 --- a/contrib/moses2/SCFG/nbest/NBest.h +++ b/contrib/moses2/SCFG/nbest/NBest.h @@ -81,8 +81,7 @@ protected: void OutputToStream( const SCFG::Manager &mgr, - std::stringstream &strm, - const NBestColl &nbestColl) const; + std::stringstream &strm) const; }; ///////////////////////////////////////////////////////////// From 00d5dfca4d788b02c98b94f8abcb2ccbe89bd018 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:25:00 +0000 Subject: [PATCH 025/176] Word -> WORD for template class --- contrib/moses2/TargetPhrase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/TargetPhrase.h b/contrib/moses2/TargetPhrase.h index 41cbb78c5..50f66326a 100644 --- a/contrib/moses2/TargetPhrase.h +++ b/contrib/moses2/TargetPhrase.h @@ -86,7 +86,7 @@ public: } - void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const + void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const { // get placeholders FactorType placeholderFactor = system.options.input.placeholder_factor; From 3275f1be13ec997c10b6dfa562e969aea1c3a3eb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:29:52 +0000 Subject: [PATCH 026/176] warning --- contrib/moses2/SCFG/Stack.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/SCFG/Stack.cpp b/contrib/moses2/SCFG/Stack.cpp index 03347dea0..2ec6ca543 100644 --- a/contrib/moses2/SCFG/Stack.cpp +++ b/contrib/moses2/SCFG/Stack.cpp @@ -76,7 +76,7 @@ Moses2::HypothesisColl &Stack::GetColl(const SCFG::Word &nt) const Hypothesis *Stack::GetBestHypo() const { SCORE bestScore = -std::numeric_limits::infinity(); - const HypothesisBase *bestHypo; + const HypothesisBase *bestHypo = NULL; BOOST_FOREACH(const Coll::value_type &val, m_coll){ const Moses2::HypothesisColl &hypos = *val.second; const Moses2::HypothesisBase *hypo = hypos.GetBestHypo(); From bb36df3a5ef8ef952106da75be30f179b22f2e1b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 14:34:24 +0000 Subject: [PATCH 027/176] refine worseScore discarding --- contrib/moses2/HypothesisColl.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 701ff318d..520557f68 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -53,7 +53,9 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { - size_t stackSize = system.options.search.stack_size; + size_t maxStackSize = system.options.search.stack_size; + //cerr << "stackSize=" << stackSize << endl; + SCORE futureScore = hypo->GetFutureScore(); /* cerr << "scores:" @@ -63,7 +65,7 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (GetSize() >= stackSize && futureScore < m_worseScore) { + if (GetSize() >= maxStackSize && futureScore < m_worseScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; @@ -102,8 +104,14 @@ void HypothesisColl::Add( if (!added.added) { hypoRecycle.Recycle(hypo); } - else if (added.other) { - hypoRecycle.Recycle(added.other); + else { + if (added.other) { + hypoRecycle.Recycle(added.other); + } + + if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worseScore) { + m_worseScore = futureScore; + } } } @@ -116,10 +124,6 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists - if (hypo->GetFutureScore() < m_worseScore) { - m_worseScore = hypo->GetFutureScore(); - } - return StackAdd(true, NULL); } else { From b188c3b649f1fbaffa75f2ee70d2f7a87c0db488 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 15:31:02 +0000 Subject: [PATCH 028/176] check that all files exists --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 16 ++++++++++++++++ .../TranslationModel/ProbingPT/ProbingPT.h | 3 +++ .../TranslationModel/ProbingPT/querying.cpp | 7 ++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 2c9a5f31a..a83fc6e0d 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -69,6 +69,7 @@ std::pair ProbingPT::ActiveChartEntryProbing::GetKey(const SCFG: //////////////////////////////////////////////////////////////////////////// ProbingPT::ProbingPT(size_t startInd, const std::string &line) :PhraseTable(startInd, line) +,load_method(util::POPULATE_OR_READ) { ReadParameters(); } @@ -149,6 +150,21 @@ void ProbingPT::Load(System &system) CreateCache(system); } +void ProbingPT::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "load") { + if (value == "lazy") { + load_method = util::LAZY; + } + else if (value == "populate") { + load_method = util::POPULATE_OR_READ; + } + else { + UTIL_THROW2("load method not supported" << value); + } + } +} + void ProbingPT::CreateAlignmentMap(System &system, const std::string path) { const std::vector< std::vector > &probingAlignColl = m_engine->getAlignments(); diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h index 6e5c7430c..9b8905843 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h @@ -15,6 +15,7 @@ #include "../../Vector.h" #include "../../Phrase.h" #include "../../SCFG/ActiveChart.h" +#include "util/mmap.hh" namespace Moses2 { @@ -69,6 +70,7 @@ public: virtual ~ProbingPT(); void Load(System &system); + virtual void SetParameter(const std::string& key, const std::string& value); void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; uint64_t GetUnk() const @@ -91,6 +93,7 @@ protected: std::vector m_sourceVocab; // factor id -> pt id std::vector< std::pair > m_targetVocab; // pt id -> factor* std::vector m_aligns; + util::LoadMethod load_method; uint64_t m_unkId; QueryEngine *m_engine; diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index fb8ccef9a..b1f19eb6e 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -17,10 +17,15 @@ QueryEngine::QueryEngine(const char * filepath) std::string path_to_source_vocabid = basepath + "/source_vocabids"; std::string alignPath = basepath + "/Alignments.dat"; - if (!FileExists(path_to_config)) { + if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || + !FileExists(path_to_source_vocabid) || !FileExists(alignPath) || + !FileExists(basepath + "TargetColl.dat") || !FileExists(basepath + "TargetVocab.dat") || + !FileExists(basepath + "cache")) { UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); } + + ///Source phrase vocabids read_map(source_vocabids, path_to_source_vocabid.c_str()); From 114702fcd8a19c64d9bbd4f13de35da73a12b5b1 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 15:54:43 +0000 Subject: [PATCH 029/176] hack kenlm's populate load by catting files into memory. Also check that all files exists --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 5 +- .../TranslationModel/ProbingPT/querying.cpp | 54 ++++++++++++++++--- .../TranslationModel/ProbingPT/querying.hh | 4 +- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index a83fc6e0d..26f570641 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -81,7 +81,7 @@ ProbingPT::~ProbingPT() void ProbingPT::Load(System &system) { - m_engine = new QueryEngine(m_path.c_str()); + m_engine = new QueryEngine(m_path.c_str(), load_method); m_unkId = 456456546456; @@ -163,6 +163,9 @@ void ProbingPT::SetParameter(const std::string& key, const std::string& value) UTIL_THROW2("load method not supported" << value); } } + else { + PhraseTable::SetParameter(key, value); + } } void ProbingPT::CreateAlignmentMap(System &system, const std::string path) diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index b1f19eb6e..e47a6d015 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -7,7 +7,7 @@ using namespace std; namespace Moses2 { -QueryEngine::QueryEngine(const char * filepath) +QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) { //Create filepaths @@ -17,15 +17,12 @@ QueryEngine::QueryEngine(const char * filepath) std::string path_to_source_vocabid = basepath + "/source_vocabids"; std::string alignPath = basepath + "/Alignments.dat"; - if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || - !FileExists(path_to_source_vocabid) || !FileExists(alignPath) || - !FileExists(basepath + "TargetColl.dat") || !FileExists(basepath + "TargetVocab.dat") || - !FileExists(basepath + "cache")) { - UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); + file_exits(basepath); + + if (load_method == util::POPULATE_OR_READ) { + cat_files(basepath); } - - ///Source phrase vocabids read_map(source_vocabids, path_to_source_vocabid.c_str()); @@ -144,5 +141,46 @@ void QueryEngine::read_alignments(const std::string &alignPath) } } +void QueryEngine::file_exits(const std::string &basePath) +{ + if (!FileExists(basePath + "/Alignments.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat"); + } + if (!FileExists(basePath + "/TargetColl.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat"); + } + if (!FileExists(basePath + "/TargetVocab.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat"); + } + if (!FileExists(basePath + "/cache")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/cache"); + } + if (!FileExists(basePath + "/config")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/config"); + } + if (!FileExists(basePath + "/probing_hash.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat"); + } + if (!FileExists(basePath + "/source_vocabids")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids"); + } + + /* + + if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || + !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) || + !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") || + !FileExists(basepath + "/cache")) { + UTIL_THROW2("A required table doesn't exist in: " << basepath); + } + */ +} + +void QueryEngine::cat_files(const std::string &basePath) +{ + system((string("cat ") + basePath + "/TargetColl.dat > /dev/null").c_str()); + system((string("cat ") + basePath + "/probing_hash.dat > /dev/null").c_str()); +} + } diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh index aae4b4f09..03d7667a9 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -27,13 +27,15 @@ class QueryEngine bool is_reordering; void read_alignments(const std::string &alignPath); + void file_exits(const std::string &basePath); + void cat_files(const std::string &basePath); public: int num_scores; int num_lex_scores; bool logProb; - QueryEngine(const char *); + QueryEngine(const char *, util::LoadMethod load_method); ~QueryEngine(); std::pair query(uint64_t key); From 62e2c852201ebe28559e2a5e707ec90a24c90c51 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:05:13 +0000 Subject: [PATCH 030/176] delete batch algorithm --- contrib/moses2/Jamfile | 4 - contrib/moses2/PhraseBased/Batch/Search.cpp | 171 -------------------- contrib/moses2/PhraseBased/Batch/Search.h | 53 ------ contrib/moses2/PhraseBased/Batch/Stack.cpp | 35 ---- contrib/moses2/PhraseBased/Batch/Stack.h | 32 ---- contrib/moses2/PhraseBased/Batch/Stacks.cpp | 67 -------- contrib/moses2/PhraseBased/Batch/Stacks.h | 62 ------- contrib/moses2/PhraseBased/Manager.cpp | 7 +- 8 files changed, 3 insertions(+), 428 deletions(-) delete mode 100644 contrib/moses2/PhraseBased/Batch/Search.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Search.h delete mode 100644 contrib/moses2/PhraseBased/Batch/Stack.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Stack.h delete mode 100644 contrib/moses2/PhraseBased/Batch/Stacks.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Stacks.h diff --git a/contrib/moses2/Jamfile b/contrib/moses2/Jamfile index 850dbcd1f..98e1c1e30 100644 --- a/contrib/moses2/Jamfile +++ b/contrib/moses2/Jamfile @@ -114,10 +114,6 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose PhraseBased/CubePruningMiniStack/Search.cpp PhraseBased/CubePruningMiniStack/Stack.cpp - PhraseBased/Batch/Search.cpp - PhraseBased/Batch/Stack.cpp - PhraseBased/Batch/Stacks.cpp - # PhraseBased/CubePruningCardinalStack/Misc.cpp # PhraseBased/CubePruningCardinalStack/Search.cpp # PhraseBased/CubePruningCardinalStack/Stack.cpp diff --git a/contrib/moses2/PhraseBased/Batch/Search.cpp b/contrib/moses2/PhraseBased/Batch/Search.cpp deleted file mode 100644 index 376232199..000000000 --- a/contrib/moses2/PhraseBased/Batch/Search.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * SearchNormal.cpp - * - * Created on: 25 Oct 2015 - * Author: hieu - */ - -#include "Search.h" -#include -#include -#include "Stack.h" -#include "../Manager.h" -#include "../TrellisPath.h" -#include "../Sentence.h" -#include "../../TrellisPaths.h" -#include "../../InputPathsBase.h" -#include "../../Phrase.h" -#include "../../System.h" -#include "../../PhraseBased/TargetPhrases.h" - -using namespace std; - -namespace Moses2 -{ -namespace NSBatch -{ - -Search::Search(Manager &mgr) -:Moses2::Search(mgr) -, m_stacks(mgr) -, m_batch(mgr.system.GetBatch(mgr.GetSystemPool())) -{ - // TODO Auto-generated constructor stub - -} - -Search::~Search() -{ - // TODO Auto-generated destructor stub -} - -void Search::Decode() -{ - // init stacks - const Sentence &sentence = static_cast(mgr.GetInput()); - m_stacks.Init(mgr, sentence.GetSize() + 1); - - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), - initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); - - m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); - - for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { - Decode(stackInd); - //cerr << m_stacks << endl; - - // delete stack to save mem - if (stackInd < m_stacks.GetSize() - 1) { - m_stacks.Delete(stackInd); - } - //cerr << m_stacks << endl; - } -} - -void Search::Decode(size_t stackInd) -{ - Stack &stack = m_stacks[stackInd]; - if (&stack == &m_stacks.Back()) { - // last stack. don't do anythin - return; - } - - const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists); - - const InputPaths &paths = mgr.GetInputPaths(); - - BOOST_FOREACH(const InputPathBase *path, paths){ - BOOST_FOREACH(const HypothesisBase *hypo, hypos) { - Extend(*static_cast(hypo), *static_cast(path)); - } - } - - // process batch - mgr.system.featureFunctions.EvaluateWhenAppliedBatch(m_batch); - - for (size_t i = 0; i < m_batch.size(); ++i) { - Hypothesis *hypo = m_batch[i]; - m_stacks.Add(hypo, mgr.GetHypoRecycle(), mgr.arcLists); - } - m_batch.clear(); -} - -void Search::Extend(const Hypothesis &hypo, const InputPath &path) -{ - const Bitmap &hypoBitmap = hypo.GetBitmap(); - const Range &hypoRange = hypo.GetInputPath().range; - const Range &pathRange = path.range; - - if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) { - return; - } - - const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); - if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) { - return; - } - //cerr << " YES" << endl; - - // extend this hypo - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos()); - SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); - - size_t numPt = mgr.system.mappings.size(); - const TargetPhrases **tpsAllPt = path.targetPhrases; - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = tpsAllPt[i]; - if (tps) { - Extend(hypo, *tps, path, newBitmap, estimatedScore); - } - } -} - -void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) -{ - BOOST_FOREACH(const TargetPhraseImpl *tp, tps){ - Extend(hypo, *tp, path, newBitmap, estimatedScore); - } -} - -void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) -{ - Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore); - - m_batch.push_back(newHypo); - //newHypo->EvaluateWhenApplied(); - - //m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists); - - //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other); - //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2); - -} - -const Hypothesis *Search::GetBestHypo() const -{ - const Stack &lastStack = m_stacks.Back(); - return lastStack.GetBestHypo(); -} - -void Search::AddInitialTrellisPaths(TrellisPaths &paths) const -{ - const Stack &lastStack = m_stacks.Back(); - const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists); - - BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){ - const Hypothesis *hypo = static_cast(hypoBase); - TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); - paths.Add(path); - } -} - -} // namespace -} - diff --git a/contrib/moses2/PhraseBased/Batch/Search.h b/contrib/moses2/PhraseBased/Batch/Search.h deleted file mode 100644 index 4f4b35d2e..000000000 --- a/contrib/moses2/PhraseBased/Batch/Search.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SearchNormal.h - * - * Created on: 25 Oct 2015 - * Author: hieu - */ -#pragma once - -#include -#include "../../legacy/Range.h" -#include "../../legacy/Bitmap.h" -#include "../../TypeDef.h" -#include "../Search.h" -#include "Stacks.h" - -namespace Moses2 -{ -class Hypothesis; -class InputPath; -class TargetPhrases; -class TargetPhraseImpl; - -namespace NSBatch -{ -class Stacks; - -class Search: public Moses2::Search -{ -public: - Search(Manager &mgr); - virtual ~Search(); - - virtual void Decode(); - const Hypothesis *GetBestHypo() const; - - void AddInitialTrellisPaths(TrellisPaths &paths) const; - -protected: - Stacks m_stacks; - - Batch &m_batch; - - void Decode(size_t stackInd); - void Extend(const Hypothesis &hypo, const InputPath &path); - void Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); - void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); - -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stack.cpp b/contrib/moses2/PhraseBased/Batch/Stack.cpp deleted file mode 100644 index ebe6832da..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stack.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Stack.cpp - * - * Created on: 24 Oct 2015 - * Author: hieu - */ -#include -#include "Stack.h" -#include "../Hypothesis.h" -#include "../Manager.h" -#include "../../Scores.h" -#include "../../HypothesisColl.h" - -using namespace std; - -namespace Moses2 -{ - -namespace NSBatch -{ - -Stack::Stack(const Manager &mgr) : - HypothesisColl(mgr) -{ - // TODO Auto-generated constructor stub - -} - -Stack::~Stack() -{ - // TODO Auto-generated destructor stub -} - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stack.h b/contrib/moses2/PhraseBased/Batch/Stack.h deleted file mode 100644 index ad8c3c649..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stack.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Stack.h - * - * Created on: 24 Oct 2015 - * Author: hieu - */ -#pragma once - -#include -#include -#include "../Hypothesis.h" -#include "../../TypeDef.h" -#include "../../HypothesisColl.h" -#include "../../legacy/Util2.h" - -namespace Moses2 -{ - -namespace NSBatch -{ -class Stack: public HypothesisColl -{ -public: - Stack(const Manager &mgr); - virtual ~Stack(); - -protected: - -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stacks.cpp b/contrib/moses2/PhraseBased/Batch/Stacks.cpp deleted file mode 100644 index 2f46aef6f..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stacks.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Stacks.cpp - * - * Created on: 6 Nov 2015 - * Author: hieu - */ - -#include "Stacks.h" -#include "../Manager.h" -#include "../../System.h" - -using namespace std; - -namespace Moses2 -{ - -namespace NSBatch -{ - -Stacks::Stacks(const Manager &mgr) : - m_mgr(mgr) -{ - // TODO Auto-generated constructor stub - -} - -Stacks::~Stacks() -{ - for (size_t i = 0; i < m_stacks.size(); ++i) { - delete m_stacks[i]; - } -} - -void Stacks::Init(const Manager &mgr, size_t numStacks) -{ - m_stacks.resize(numStacks); - for (size_t i = 0; i < m_stacks.size(); ++i) { - m_stacks[i] = new Stack(mgr); - } -} - -std::string Stacks::Debug(const System &system) const -{ - stringstream out; - for (size_t i = 0; i < GetSize(); ++i) { - const Stack *stack = m_stacks[i]; - if (stack) { - out << stack->GetSize() << " "; - } - else { - out << "N "; - } - } - return out.str(); -} - -void Stacks::Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists) -{ - size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists); -} - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stacks.h b/contrib/moses2/PhraseBased/Batch/Stacks.h deleted file mode 100644 index 8cd2d857f..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stacks.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Stacks.h - * - * Created on: 6 Nov 2015 - * Author: hieu - */ - -#pragma once - -#include -#include "Stack.h" -#include "../../Recycler.h" - -namespace Moses2 -{ -class Manager; -class ArcLists; - -namespace NSBatch -{ - -class Stacks -{ -public: - Stacks(const Manager &mgr); - virtual ~Stacks(); - - void Init(const Manager &mgr, size_t numStacks); - - size_t GetSize() const - { - return m_stacks.size(); - } - - const Stack &Back() const - { - return *m_stacks.back(); - } - - Stack &operator[](size_t ind) - { - return *m_stacks[ind]; - } - - void Delete(size_t ind) - { - delete m_stacks[ind]; - m_stacks[ind] = NULL; - } - - void Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists); - - std::string Debug(const System &system) const; - -protected: - const Manager &m_mgr; - std::vector m_stacks; -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/contrib/moses2/PhraseBased/Manager.cpp index 6fd1b0db3..b89897070 100644 --- a/contrib/moses2/PhraseBased/Manager.cpp +++ b/contrib/moses2/PhraseBased/Manager.cpp @@ -16,7 +16,6 @@ #include "Normal/Search.h" #include "CubePruningMiniStack/Search.h" -#include "Batch/Search.h" /* #include "CubePruningPerMiniStack/Search.h" @@ -95,7 +94,8 @@ void Manager::Init() m_search = new NSNormal::Search(*this); break; case NormalBatch: - m_search = new NSBatch::Search(*this); + //m_search = new NSBatch::Search(*this); + UTIL_THROW2("Not implemented"); break; case CubePruning: case CubePruningMiniStack: @@ -116,8 +116,7 @@ void Manager::Init() break; */ default: - cerr << "Unknown search algorithm" << endl; - abort(); + UTIL_THROW2("Unknown search algorithm"); } } From b373a37d00169cc606c03ed0f1f3abb4ca88882e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:26:58 +0000 Subject: [PATCH 031/176] get ready to change GetSortedAndPruneHypos() to non-const --- contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index d74cb7d99..4ea61e0ba 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -172,6 +172,9 @@ void Search::PostDecode(size_t stackInd) const Bitmap &hypoBitmap = *val.first.first; size_t firstGap = hypoBitmap.GetFirstGapPos(); size_t hypoEndPos = val.first.second; + + Moses2::HypothesisColl &hypos = *val.second; + //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl; // create edges to next hypos from existing hypos @@ -203,7 +206,7 @@ void Search::PostDecode(size_t stackInd) CubeEdges &edges = *m_cubeEdges[numWords]; // sort hypo for a particular bitmap and hypoEndPos - const Hypotheses &sortedHypos = val.second->GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); size_t numPt = mgr.system.mappings.size(); for (size_t i = 0; i < numPt; ++i) { @@ -228,7 +231,7 @@ void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { const Stack::Coll &coll = m_stack.GetColl(); BOOST_FOREACH(const Stack::Coll::value_type &val, coll){ - const Moses2::HypothesisColl &hypos = *val.second; + Moses2::HypothesisColl &hypos = *val.second; const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { From f7cf9a84edc8a3a17e1eabe32f0bc8f3097ba97c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:29:52 +0000 Subject: [PATCH 032/176] use GetBestHypo() --- contrib/moses2/PhraseBased/Normal/Search.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index cb528d17f..5c89eecc6 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -140,13 +140,7 @@ void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, const Hypothesis *Search::GetBestHypo() const { const Stack &lastStack = m_stacks.Back(); - const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr, - mgr.arcLists); - - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = static_cast(sortedHypos[0]); - } + const Hypothesis *best = lastStack.GetBestHypo(); return best; } From fc4fa0f19cda10183a7514e700eff576ed85ec2c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 18:04:26 +0000 Subject: [PATCH 033/176] add inter-stack pruning --- contrib/moses2/HypothesisColl.cpp | 66 +++++++++++++++++-- contrib/moses2/HypothesisColl.h | 4 +- .../CubePruningMiniStack/Stack.cpp | 2 +- contrib/moses2/PhraseBased/Normal/Stacks.cpp | 2 +- contrib/moses2/SCFG/Stack.cpp | 2 +- 5 files changed, 68 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 520557f68..3770bf982 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -48,13 +48,17 @@ const HypothesisBase *HypothesisColl::GetBestHypo() const } void HypothesisColl::Add( - const System &system, + const ManagerBase &mgr, HypothesisBase *hypo, Recycler &hypoRecycle, ArcLists &arcLists) { - size_t maxStackSize = system.options.search.stack_size; - //cerr << "stackSize=" << stackSize << endl; + size_t maxStackSize = mgr.system.options.search.stack_size; + //cerr << "maxStackSize=" << maxStackSize << endl; + + if (GetSize() * 2 > maxStackSize) { + PruneHypos(mgr, mgr.arcLists); + } SCORE futureScore = hypo->GetFutureScore(); /* @@ -96,7 +100,7 @@ void HypothesisColl::Add( StackAdd added = Add(hypo); - size_t nbestSize = system.options.nbest.nbest_size; + size_t nbestSize = mgr.system.options.nbest.nbest_size; if (nbestSize) { arcLists.AddArc(added.added, hypo, added.other); } @@ -219,6 +223,60 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const +{ + size_t stackSize = mgr.system.options.search.stack_size; + Recycler &recycler = mgr.GetHypoRecycle(); + + /* + cerr << "UNSORTED hypos: "; + BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { + cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " "; + } + cerr << endl; + */ + vector sortedHypos; + size_t ind = 0; + BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ + sortedHypos[ind] = hypo; + ++ind; + } + + vector::iterator iterMiddle; + iterMiddle = + (stackSize == 0 || sortedHypos.size() < stackSize) ? + sortedHypos.end() : sortedHypos.begin() + stackSize; + + std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), + HypothesisFutureScoreOrderer()); + + // prune + if (stackSize && sortedHypos.size() > stackSize) { + for (size_t i = stackSize; i < sortedHypos.size(); ++i) { + HypothesisBase *hypo = const_cast((sortedHypos)[i]); + recycler.Recycle(hypo); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + + // delete from collection + //Delete(hypo); + } + sortedHypos.resize(stackSize); + } + + /* + cerr << "sorted hypos: "; + for (size_t i = 0; i < sortedHypos.size(); ++i) { + const HypothesisBase *hypo = sortedHypos[i]; + cerr << hypo << " "; + } + cerr << endl; + */ +} + void HypothesisColl::Clear() { m_sortedHypos = NULL; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index d89f28c7c..0771fa449 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -25,7 +25,7 @@ class HypothesisColl public: HypothesisColl(const ManagerBase &mgr); - void Add(const System &system, + void Add(const ManagerBase &mgr, HypothesisBase *hypo, Recycler &hypoRecycle, ArcLists &arcLists); @@ -67,6 +67,8 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + }; } /* namespace Moses2 */ diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp index 1f456dd93..e2b81f0ba 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp @@ -47,7 +47,7 @@ void Stack::Add(Hypothesis *hypo, Recycler &hypoRecycle, { HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); Moses2::HypothesisColl &coll = GetMiniStack(key); - coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + coll.Add(m_mgr, hypo, hypoRecycle, arcLists); } const Hypothesis *Stack::GetBestHypo() const diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.cpp b/contrib/moses2/PhraseBased/Normal/Stacks.cpp index 4aab42347..bb7239cf8 100644 --- a/contrib/moses2/PhraseBased/Normal/Stacks.cpp +++ b/contrib/moses2/PhraseBased/Normal/Stacks.cpp @@ -60,7 +60,7 @@ void Stacks::Add(Hypothesis *hypo, Recycler &hypoRecycle, size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); //cerr << "numWordsCovered=" << numWordsCovered << endl; Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + stack.Add(m_mgr, hypo, hypoRecycle, arcLists); } } diff --git a/contrib/moses2/SCFG/Stack.cpp b/contrib/moses2/SCFG/Stack.cpp index 2ec6ca543..163761a49 100644 --- a/contrib/moses2/SCFG/Stack.cpp +++ b/contrib/moses2/SCFG/Stack.cpp @@ -33,7 +33,7 @@ void Stack::Add(SCFG::Hypothesis *hypo, Recycler &hypoRecycle, //cerr << "lhs=" << lhs << endl; HypothesisColl &coll = GetColl(lhs); - coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + coll.Add(m_mgr, hypo, hypoRecycle, arcLists); } size_t Stack::GetSize() const From 9a272ba519757d9d6fcaa4a4ca7fff90b2db9b68 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 22:47:37 +0000 Subject: [PATCH 034/176] add inter-stack pruning --- contrib/moses2/HypothesisColl.cpp | 35 ++++++++++++++++++++----------- contrib/moses2/HypothesisColl.h | 4 +++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 3770bf982..06be07991 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -54,9 +54,9 @@ void HypothesisColl::Add( ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; - //cerr << "maxStackSize=" << maxStackSize << endl; - if (GetSize() * 2 > maxStackSize) { + if (GetSize() > maxStackSize * 2) { + //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; PruneHypos(mgr, mgr.arcLists); } @@ -223,9 +223,9 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { - size_t stackSize = mgr.system.options.search.stack_size; + size_t maxStackSize = mgr.system.options.search.stack_size; Recycler &recycler = mgr.GetHypoRecycle(); /* @@ -235,7 +235,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons } cerr << endl; */ - vector sortedHypos; + vector sortedHypos(GetSize()); size_t ind = 0; BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ sortedHypos[ind] = hypo; @@ -244,17 +244,16 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons vector::iterator iterMiddle; iterMiddle = - (stackSize == 0 || sortedHypos.size() < stackSize) ? - sortedHypos.end() : sortedHypos.begin() + stackSize; + (maxStackSize == 0 || sortedHypos.size() < maxStackSize) ? + sortedHypos.end() : sortedHypos.begin() + maxStackSize; std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), HypothesisFutureScoreOrderer()); // prune - if (stackSize && sortedHypos.size() > stackSize) { - for (size_t i = stackSize; i < sortedHypos.size(); ++i) { + if (maxStackSize && sortedHypos.size() > maxStackSize) { + for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { HypothesisBase *hypo = const_cast((sortedHypos)[i]); - recycler.Recycle(hypo); // delete from arclist if (mgr.system.options.nbest.nbest_size) { @@ -262,9 +261,11 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons } // delete from collection - //Delete(hypo); + Delete(hypo); + + //recycler.Recycle(hypo); } - sortedHypos.resize(stackSize); + } /* @@ -277,6 +278,16 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons */ } +void HypothesisColl::Delete(const HypothesisBase *hypo) +{ + cerr << "hypo=" << hypo << " " << m_coll.size() << endl; + + _HCType::const_iterator iter = m_coll.find(hypo); + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); + + m_coll.erase(iter); +} + void HypothesisColl::Clear() { m_sortedHypos = NULL; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 0771fa449..4e41509de 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -50,6 +50,8 @@ public: return hypo ? &hypo->Cast() : NULL; } + void Delete(const HypothesisBase *hypo); + std::string Debug(const System &system) const; protected: @@ -67,7 +69,7 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); }; From 6356ccae0315e50898926f01cfdb7d34537ce9e6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 00:20:41 +0000 Subject: [PATCH 035/176] debug --- contrib/moses2/HypothesisColl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 06be07991..b7037992f 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -263,7 +263,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) // delete from collection Delete(hypo); - //recycler.Recycle(hypo); + recycler.Recycle(hypo); } } @@ -280,7 +280,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) void HypothesisColl::Delete(const HypothesisBase *hypo) { - cerr << "hypo=" << hypo << " " << m_coll.size() << endl; + //cerr << "hypo=" << hypo << " " << m_coll.size() << endl; _HCType::const_iterator iter = m_coll.find(hypo); UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); From d68211cba912e96a0491ded7ec71b910b8d7a9f9 Mon Sep 17 00:00:00 2001 From: lonevvolf Date: Tue, 6 Dec 2016 09:41:32 +0100 Subject: [PATCH 036/176] Fix for number at the end of a string --- scripts/generic/ph_numbers.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generic/ph_numbers.perl b/scripts/generic/ph_numbers.perl index d49bc98b8..618e6fe15 100755 --- a/scripts/generic/ph_numbers.perl +++ b/scripts/generic/ph_numbers.perl @@ -88,7 +88,7 @@ sub recognize { $isRecognized = 0; } - if ($end == length($input) -1 || substr($input, $end, 1) eq " ") { + if ($end == length($input) || substr($input, $end, 1) eq " ") { # last word, or next char is a space } else { From e8a6677bbbbf0ccdc3f8ae07211fe988020ef7dc Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 13:23:26 +0000 Subject: [PATCH 037/176] bug in state comparison. If 2 states are actually the same object, return true, not false --- .../FF/LexicalReordering/BidirectionalReorderingState.cpp | 2 +- moses/FF/LexicalReordering/BidirectionalReorderingState.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp index bc27f2a68..8c1b409c3 100644 --- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp +++ b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp @@ -54,7 +54,7 @@ size_t BidirectionalReorderingState::hash() const bool BidirectionalReorderingState::operator==(const FFState& o) const { - if (&o == this) return 0; + if (&o == this) return true; BidirectionalReorderingState const &other = static_cast(o); diff --git a/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp b/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp index 22f550ba8..5d264e4c8 100644 --- a/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp +++ b/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp @@ -15,7 +15,7 @@ size_t BidirectionalReorderingState::hash() const bool BidirectionalReorderingState::operator==(const FFState& o) const { - if (&o == this) return 0; + if (&o == this) return true; BidirectionalReorderingState const &other = static_cast(o); From 40a2588fd00a68cd7cfead5159bf4a46009ee85b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 13:29:43 +0000 Subject: [PATCH 038/176] erase object from set --- contrib/moses2/HypothesisColl.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index b7037992f..fb35c00e9 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -282,10 +282,8 @@ void HypothesisColl::Delete(const HypothesisBase *hypo) { //cerr << "hypo=" << hypo << " " << m_coll.size() << endl; - _HCType::const_iterator iter = m_coll.find(hypo); - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); - - m_coll.erase(iter); + size_t erased = m_coll.erase(hypo); + UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo); } void HypothesisColl::Clear() From a6d226c6b64805893b8c6303cbd2cf7c098e1f28 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 14:00:04 +0000 Subject: [PATCH 039/176] update worse score during pruning --- contrib/moses2/HypothesisColl.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index fb35c00e9..3427ebc47 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -250,10 +250,19 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), HypothesisFutureScoreOrderer()); + // update worse score + m_worseScore = std::numeric_limits::infinity(); + for (size_t i = 0; i < maxStackSize; ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); + if (hypo->GetFutureScore() < m_worseScore) { + m_worseScore = hypo->GetFutureScore(); + } + } + // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { - HypothesisBase *hypo = const_cast((sortedHypos)[i]); + HypothesisBase *hypo = const_cast(sortedHypos[i]); // delete from arclist if (mgr.system.options.nbest.nbest_size) { From ac6f234592675aaffe61205a0d966011c9e469f7 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 09:31:29 -0500 Subject: [PATCH 040/176] update worse score. Best hypos are already sorted using partial sort, don't need to go thru each of them --- contrib/moses2/HypothesisColl.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 3427ebc47..bb575382b 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -251,13 +251,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = std::numeric_limits::infinity(); - for (size_t i = 0; i < maxStackSize; ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); - if (hypo->GetFutureScore() < m_worseScore) { - m_worseScore = hypo->GetFutureScore(); - } - } + m_worseScore = sortedHypos[maxStackSize]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { From 8f012ba9c98c6787c67891334aa7812eb52c36d0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 09:39:59 -0500 Subject: [PATCH 041/176] tweak --- contrib/moses2/HypothesisColl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index bb575382b..841f45abf 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -251,7 +251,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = sortedHypos[maxStackSize]->GetFutureScore(); + m_worseScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { From 30aa185cf24eabe5bbc52fc01f00bcc9af26decb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 16:02:10 +0000 Subject: [PATCH 042/176] beam pruning --- contrib/moses2/HypothesisColl.cpp | 30 ++++++++++++++++++------------ contrib/moses2/HypothesisColl.h | 5 ++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 841f45abf..487b9c36b 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,9 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) :m_coll(MemPoolAllocator(mgr.GetPool())) ,m_sortedHypos(NULL) { - //m_bestScore = -std::numeric_limits::infinity(); - //m_minBeamScore = -std::numeric_limits::infinity(); - m_worseScore = std::numeric_limits::infinity(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -69,9 +68,9 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (GetSize() >= maxStackSize && futureScore < m_worseScore) { + if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -79,7 +78,7 @@ void HypothesisColl::Add( /* if (futureScore < m_minBeamScore) { // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -113,8 +112,16 @@ void HypothesisColl::Add( hypoRecycle.Recycle(added.other); } - if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worseScore) { - m_worseScore = futureScore; + // update beam variables + if (futureScore > m_bestScore) { + m_bestScore = futureScore; + float beamWidth = mgr.system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + else if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worstScore) { + m_worstScore = futureScore; } } } @@ -251,7 +258,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); + m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { @@ -294,9 +301,8 @@ void HypothesisColl::Clear() m_sortedHypos = NULL; m_coll.clear(); - //m_bestScore = -std::numeric_limits::infinity(); - //m_minBeamScore = -std::numeric_limits::infinity(); - m_worseScore = std::numeric_limits::infinity(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 4e41509de..b65fd8855 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -62,9 +62,8 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - //SCORE m_bestScore; - SCORE m_worseScore; - //SCORE m_minBeamScore; + SCORE m_bestScore; + SCORE m_worstScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From 7e922f7a5da186a22c7754b1d201e18ab94baf61 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 16:08:16 +0000 Subject: [PATCH 043/176] beam pruning even with nbest --- contrib/moses2/HypothesisColl.cpp | 56 ++++++++++--------------------- 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 487b9c36b..d6bfd4a73 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -75,27 +75,6 @@ void HypothesisColl::Add( hypoRecycle.Recycle(hypo); return; } - /* - if (futureScore < m_minBeamScore) { - // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; - hypoRecycle.Recycle(hypo); - return; - } - - if (futureScore > m_bestScore) { - m_bestScore = hypo->GetFutureScore(); - - // this may also affect the worst score - SCORE beamWidth = system.options.search.beam_width; - //cerr << "beamWidth=" << beamWidth << endl; - if ( m_bestScore + beamWidth > m_minBeamScore ) { - m_minBeamScore = m_bestScore + beamWidth; - } - } - //cerr << "OK:" << hypo->Debug(system) << endl; - */ StackAdd added = Add(hypo); @@ -104,28 +83,29 @@ void HypothesisColl::Add( arcLists.AddArc(added.added, hypo, added.other); } else { - if (!added.added) { - hypoRecycle.Recycle(hypo); + if (added.added) { + if (added.other) { + hypoRecycle.Recycle(added.other); + } } else { - if (added.other) { - hypoRecycle.Recycle(added.other); - } - - // update beam variables - if (futureScore > m_bestScore) { - m_bestScore = futureScore; - float beamWidth = mgr.system.options.search.beam_width; - if ( m_bestScore + beamWidth > m_worstScore ) { - m_worstScore = m_bestScore + beamWidth; - } - } - else if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worstScore) { - m_worstScore = futureScore; - } + hypoRecycle.Recycle(hypo); } } + // update beam variables + if (added.added) { + if (futureScore > m_bestScore) { + m_bestScore = futureScore; + float beamWidth = mgr.system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + else if (GetSize() <= maxStackSize && futureScore < m_worstScore) { + m_worstScore = futureScore; + } + } } StackAdd HypothesisColl::Add(const HypothesisBase *hypo) From 7cdff3a148c4429f11507707880c4dbd7ae050bb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 18:10:56 +0000 Subject: [PATCH 044/176] use arrays, not vector --- contrib/moses2/HypothesisColl.cpp | 41 ++++++++++++++++--------------- contrib/moses2/HypothesisColl.h | 2 +- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index d6bfd4a73..48f5ef8d2 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -56,7 +56,8 @@ void HypothesisColl::Add( if (GetSize() > maxStackSize * 2) { //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; - PruneHypos(mgr, mgr.arcLists); + const HypothesisBase *sortedHypos[GetSize()]; + PruneHypos(mgr, mgr.arcLists, sortedHypos); } SCORE futureScore = hypo->GetFutureScore(); @@ -210,9 +211,13 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) { size_t maxStackSize = mgr.system.options.search.stack_size; + assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that + assert(GetSize() > maxStackSize); + //assert(sortedHypos.size() == GetSize()); + Recycler &recycler = mgr.GetHypoRecycle(); /* @@ -222,40 +227,36 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) } cerr << endl; */ - vector sortedHypos(GetSize()); size_t ind = 0; BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ sortedHypos[ind] = hypo; ++ind; } - vector::iterator iterMiddle; - iterMiddle = - (maxStackSize == 0 || sortedHypos.size() < maxStackSize) ? - sortedHypos.end() : sortedHypos.begin() + maxStackSize; + const HypothesisBase **iterMiddle = sortedHypos + maxStackSize; - std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), + std::partial_sort( + sortedHypos, + iterMiddle, + sortedHypos + GetSize(), HypothesisFutureScoreOrderer()); // update worse score m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune - if (maxStackSize && sortedHypos.size() > maxStackSize) { - for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); + for (size_t i = maxStackSize; i < GetSize(); ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - - // delete from collection - Delete(hypo); - - recycler.Recycle(hypo); + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); } + // delete from collection + Delete(hypo); + + recycler.Recycle(hypo); } /* diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index b65fd8855..ac9a32a2c 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -68,7 +68,7 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos); }; From 883f2e4f143447c86714adf8233e846b0e60a57e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 18:36:44 +0000 Subject: [PATCH 045/176] get ready to merge similar sorting functions --- contrib/moses2/Array.h | 3 ++ contrib/moses2/HypothesisColl.cpp | 54 +++++++++++++++++------------ contrib/moses2/HypothesisColl.h | 3 +- contrib/moses2/legacy/Parameter.cpp | 2 +- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/contrib/moses2/Array.h b/contrib/moses2/Array.h index c8552f5e7..59b003135 100644 --- a/contrib/moses2/Array.h +++ b/contrib/moses2/Array.h @@ -56,6 +56,9 @@ public: return m_arr[ind]; } + T *GetArray() + { return m_arr; } + size_t hash() const { size_t seed = 0; diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 48f5ef8d2..185b8bdc8 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -56,8 +56,7 @@ void HypothesisColl::Add( if (GetSize() > maxStackSize * 2) { //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; - const HypothesisBase *sortedHypos[GetSize()]; - PruneHypos(mgr, mgr.arcLists, sortedHypos); + PruneHypos(mgr, mgr.arcLists); } SCORE futureScore = hypo->GetFutureScore(); @@ -211,15 +210,42 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) +{ + size_t maxStackSize = mgr.system.options.search.stack_size; + + Recycler &recycler = mgr.GetHypoRecycle(); + + const HypothesisBase *sortedHypos[GetSize()]; + PruneHypos(mgr, mgr.arcLists, sortedHypos); + + // update worse score + m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); + + // prune + for (size_t i = maxStackSize; i < GetSize(); ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + + // delete from collection + Delete(hypo); + + recycler.Recycle(hypo); + } + +} + +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that assert(GetSize() > maxStackSize); //assert(sortedHypos.size() == GetSize()); - Recycler &recycler = mgr.GetHypoRecycle(); - /* cerr << "UNSORTED hypos: "; BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { @@ -241,24 +267,6 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, cons sortedHypos + GetSize(), HypothesisFutureScoreOrderer()); - // update worse score - m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); - - // prune - for (size_t i = maxStackSize; i < GetSize(); ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); - - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - - // delete from collection - Delete(hypo); - - recycler.Recycle(hypo); - } - /* cerr << "sorted hypos: "; for (size_t i = 0; i < sortedHypos.size(); ++i) { diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index ac9a32a2c..4b0008858 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -68,7 +68,8 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const; }; diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 666eb0e98..553f82f3b 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -84,7 +84,7 @@ Parameter::Parameter() //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", - "maximum stack size for histogram pruning. 0 = unlimited stack size"); + "maximum stack size for histogram pruning. CANNOT USE 0 = unlimited stack size"); //AddParam(search_opts, "stack-diversity", "sd", // "minimum number of hypothesis of each coverage in stack (default 0)"); From ab28a3fc8c092cfbdb98a2ae92ce708e938697c4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 00:16:38 +0000 Subject: [PATCH 046/176] can prune stack < max stack size --- contrib/moses2/HypothesisColl.cpp | 18 +++++++++++++++--- contrib/moses2/legacy/Parameter.cpp | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 185b8bdc8..327e3dcd6 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -242,8 +242,8 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; - assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that - assert(GetSize() > maxStackSize); + //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that + //assert(GetSize() > maxStackSize); //assert(sortedHypos.size() == GetSize()); /* @@ -259,7 +259,19 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, cons ++ind; } - const HypothesisBase **iterMiddle = sortedHypos + maxStackSize; + size_t indMiddle; + if (maxStackSize == 0) { + indMiddle = GetSize(); + } + else if (GetSize() > maxStackSize) { + indMiddle = maxStackSize; + } + else { + // GetSize() <= maxStackSize + indMiddle = GetSize(); + } + + const HypothesisBase **iterMiddle = sortedHypos + indMiddle; std::partial_sort( sortedHypos, diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 553f82f3b..666eb0e98 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -84,7 +84,7 @@ Parameter::Parameter() //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", - "maximum stack size for histogram pruning. CANNOT USE 0 = unlimited stack size"); + "maximum stack size for histogram pruning. 0 = unlimited stack size"); //AddParam(search_opts, "stack-diversity", "sd", // "minimum number of hypothesis of each coverage in stack (default 0)"); From 8ef7db569a82604fc523aa19416e2dcabd0faa43 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 00:38:11 +0000 Subject: [PATCH 047/176] merge similar sorting functions --- contrib/moses2/HypothesisColl.cpp | 73 +++++++++---------------------- contrib/moses2/HypothesisColl.h | 3 +- 2 files changed, 21 insertions(+), 55 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 327e3dcd6..8bad53888 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -147,13 +147,25 @@ const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( m_sortedHypos = new (pool.Allocate()) Hypotheses(pool, m_coll.size()); - size_t ind = 0; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ - (*m_sortedHypos)[ind] = hypo; - ++ind; - } + SortHypos(mgr, m_sortedHypos->GetArray()); + + // prune + Recycler &recycler = mgr.GetHypoRecycle(); + + size_t maxStackSize = mgr.system.options.search.stack_size; + if (maxStackSize && m_sortedHypos->size() > maxStackSize) { + for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) { + HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); + recycler.Recycle(hypo); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + } + m_sortedHypos->resize(maxStackSize); + } - SortAndPruneHypos(mgr, arcLists); } return *m_sortedHypos; @@ -165,51 +177,6 @@ const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const return *m_sortedHypos; } -void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, - ArcLists &arcLists) const -{ - size_t stackSize = mgr.system.options.search.stack_size; - Recycler &recycler = mgr.GetHypoRecycle(); - - /* - cerr << "UNSORTED hypos: "; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { - cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " "; - } - cerr << endl; - */ - Hypotheses::iterator iterMiddle; - iterMiddle = - (stackSize == 0 || m_sortedHypos->size() < stackSize) ? - m_sortedHypos->end() : m_sortedHypos->begin() + stackSize; - - std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(), - HypothesisFutureScoreOrderer()); - - // prune - if (stackSize && m_sortedHypos->size() > stackSize) { - for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) { - HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); - recycler.Recycle(hypo); - - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - } - m_sortedHypos->resize(stackSize); - } - - /* - cerr << "sorted hypos: "; - for (size_t i = 0; i < m_sortedHypos->size(); ++i) { - const HypothesisBase *hypo = (*m_sortedHypos)[i]; - cerr << hypo << " "; - } - cerr << endl; - */ -} - void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; @@ -217,7 +184,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) Recycler &recycler = mgr.GetHypoRecycle(); const HypothesisBase *sortedHypos[GetSize()]; - PruneHypos(mgr, mgr.arcLists, sortedHypos); + SortHypos(mgr, sortedHypos); // update worse score m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); @@ -239,7 +206,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const +void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 4b0008858..8b8cb8f85 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -66,10 +66,9 @@ protected: SCORE m_worstScore; StackAdd Add(const HypothesisBase *hypo); - void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const; + void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const; }; From bc6c1f41601159f8f7de7930b748aae22d085ffb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 10:09:38 +0000 Subject: [PATCH 048/176] cleanup --- contrib/moses2/HypothesisColl.cpp | 8 +------- contrib/moses2/HypothesisColl.h | 4 +--- .../moses2/PhraseBased/CubePruningMiniStack/Search.cpp | 4 ++-- contrib/moses2/PhraseBased/Normal/Search.cpp | 4 ++-- contrib/moses2/SCFG/nbest/KBestExtractor.cpp | 2 +- contrib/moses2/TranslationModel/PhraseTable.cpp | 2 +- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 8bad53888..4566cc925 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -137,7 +137,7 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) //assert(false); } -const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( +const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos( const ManagerBase &mgr, ArcLists &arcLists) const { @@ -171,12 +171,6 @@ const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( return *m_sortedHypos; } -const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const -{ - UTIL_THROW_IF2(m_sortedHypos == NULL, "m_sortedHypos must be sorted beforehand"); - return *m_sortedHypos; -} - void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 8b8cb8f85..81a3b25c3 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -35,12 +35,10 @@ public: void Clear(); - const Hypotheses &GetSortedAndPruneHypos( + const Hypotheses &GetSortedAndPrunedHypos( const ManagerBase &mgr, ArcLists &arcLists) const; - const Hypotheses &GetSortedAndPrunedHypos() const; - const HypothesisBase *GetBestHypo() const; template diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index 4ea61e0ba..94baafeb9 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -206,7 +206,7 @@ void Search::PostDecode(size_t stackInd) CubeEdges &edges = *m_cubeEdges[numWords]; // sort hypo for a particular bitmap and hypoEndPos - const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); size_t numPt = mgr.system.mappings.size(); for (size_t i = 0; i < numPt; ++i) { @@ -232,7 +232,7 @@ void Search::AddInitialTrellisPaths(TrellisPaths &paths) const const Stack::Coll &coll = m_stack.GetColl(); BOOST_FOREACH(const Stack::Coll::value_type &val, coll){ Moses2::HypothesisColl &hypos = *val.second; - const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { const Hypothesis *hypo = static_cast(hypoBase); diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index 5c89eecc6..f7e26f907 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -73,7 +73,7 @@ void Search::Decode(size_t stackInd) return; } - const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); //cerr << "hypos=" << hypos.size() << endl; const InputPaths &paths = mgr.GetInputPaths(); @@ -147,7 +147,7 @@ const Hypothesis *Search::GetBestHypo() const void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { const Stack &lastStack = m_stacks.Back(); - const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){ const Hypothesis *hypo = static_cast(hypoBase); diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp index 87b9c86d5..ae7ec8634 100644 --- a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp +++ b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp @@ -42,7 +42,7 @@ void KBestExtractor::OutputToStream(std::stringstream &strm) UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack"); UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection"); - const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(); + const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists); UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection"); const HypothesisBase *hypo = hypos[0]; diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/contrib/moses2/TranslationModel/PhraseTable.cpp index 6c2d6eaf2..ccc871b08 100644 --- a/contrib/moses2/TranslationModel/PhraseTable.cpp +++ b/contrib/moses2/TranslationModel/PhraseTable.cpp @@ -154,7 +154,7 @@ void PhraseTable::LookupNT( BOOST_FOREACH (const SCFG::Stack::Coll::value_type &valPair, stackColl) { const SCFG::Word &ntSought = valPair.first; const Moses2::HypothesisColl *hypos = valPair.second; - const Moses2::Hypotheses &sortedHypos = hypos->GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Moses2::Hypotheses &sortedHypos = hypos->GetSortedAndPrunedHypos(mgr, mgr.arcLists); //cerr << "ntSought=" << ntSought << ntSought.isNonTerminal << endl; LookupGivenWord(pool, mgr, prevPath, ntSought, &sortedHypos, subPhraseRange, outPath); } From dd1532637b1b28dfdcfd115979ae9b513bc8f729 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Dec 2016 18:09:52 +0000 Subject: [PATCH 049/176] delete debugging message --- contrib/moses2/TranslationModel/PhraseTable.cpp | 2 +- contrib/other-builds/moses/.cproject | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/contrib/moses2/TranslationModel/PhraseTable.cpp index ccc871b08..c790147bb 100644 --- a/contrib/moses2/TranslationModel/PhraseTable.cpp +++ b/contrib/moses2/TranslationModel/PhraseTable.cpp @@ -87,8 +87,8 @@ void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const if (SatisfyBackoff(mgr, *path)) { TargetPhrases *tpsPtr = Lookup(mgr, mgr.GetPool(), *path); - cerr << "tpsPtr=" << tpsPtr << " "; /* + cerr << "tpsPtr=" << tpsPtr << " "; if (tps.get()) { cerr << tps.get()->GetSize(); } diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 81da1d22b..e1a30c7af 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,7 +11,7 @@ - + @@ -86,7 +86,7 @@ - + From 304a6652f77a43f127206e58a95f7e517d32a0d4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Dec 2016 18:16:30 +0000 Subject: [PATCH 050/176] debugging message --- contrib/moses2/PhraseBased/Normal/Search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index f7e26f907..7c5026a7c 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -60,7 +60,7 @@ void Search::Decode() if (stackInd < m_stacks.GetSize() - 1) { m_stacks.Delete(stackInd); } - //cerr << m_stacks << endl; + //cerr << m_stacks.Debug(mgr.system) << endl; } } From 25b87e14debb2bd711b3ec45a85c815518bb570b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 16 Dec 2016 15:58:50 +0000 Subject: [PATCH 051/176] unused method --- contrib/moses2/Weights.cpp | 10 ---------- contrib/moses2/Weights.h | 2 -- 2 files changed, 12 deletions(-) diff --git a/contrib/moses2/Weights.cpp b/contrib/moses2/Weights.cpp index d0d923e93..643847eee 100644 --- a/contrib/moses2/Weights.cpp +++ b/contrib/moses2/Weights.cpp @@ -36,16 +36,6 @@ void Weights::Init(const FeatureFunctions &ffs) m_weights.resize(totalNumScores, 1); } -std::ostream &Weights::Debug(std::ostream &out, const System &system) const -{ - const FeatureFunctions &ffs = system.featureFunctions; - size_t numScores = ffs.GetNumScores(); - for (size_t i = 0; i < numScores; ++i) { - out << m_weights[i] << " "; - } - -} - std::vector Weights::GetWeights(const FeatureFunction &ff) const { std::vector ret(m_weights.begin() + ff.GetStartInd(), m_weights.begin() + ff.GetStartInd() + ff.GetNumScores()); diff --git a/contrib/moses2/Weights.h b/contrib/moses2/Weights.h index d822ff923..c3c2cee62 100644 --- a/contrib/moses2/Weights.h +++ b/contrib/moses2/Weights.h @@ -27,8 +27,6 @@ public: return m_weights[ind]; } - std::ostream &Debug(std::ostream &out, const System &system) const; - std::vector GetWeights(const FeatureFunction &ff) const; void SetWeights(const FeatureFunctions &ffs, const std::string &ffName, const std::vector &weights); From e72bc47910ae58ab98a8104ca1161a020001dd19 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 21 Dec 2016 18:04:22 +0000 Subject: [PATCH 052/176] eclipse --- contrib/other-builds/moses/.cproject | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 81da1d22b..491caa587 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,7 +11,7 @@ - + @@ -26,15 +26,15 @@ - - - - @@ -86,7 +86,7 @@ - + From c6c3bc84b7673618f379482cbc6b708f55a9ecd3 Mon Sep 17 00:00:00 2001 From: alvations Date: Fri, 23 Dec 2016 14:21:20 +0800 Subject: [PATCH 053/176] Changed \p{Hyphen} to \p{LineBreak} Using Perl v5.18.2, it's reporting this warning: **Use of 'Hyphen' in \p{} or \P{} is deprecated because: Supplanted by Line_Break property values; see www.unicode.org/reports/tr14** --- scripts/generic/mteval-v13a.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generic/mteval-v13a.pl b/scripts/generic/mteval-v13a.pl index bdc2d9479..2e5d29ad5 100755 --- a/scripts/generic/mteval-v13a.pl +++ b/scripts/generic/mteval-v13a.pl @@ -19,6 +19,8 @@ binmode STDERR, ":utf8"; # version 13a # * modified the scoring functions to prevent division-by-zero errors when a system segment is empty # * affected methods: 'bleu_score' and 'bleu_score_smoothing' +# * use \p{Line_Breaks} instead of \p{Hyphen} when stripping end-of-line hyphenation and join lines +# * because \p{Hyphen} is deprecated since 2016-06-01, see http://www.unicode.org/reports/tr14/#Hyphen # # version 13 # * Uses a XML parser to read data (only when extension is .xml) @@ -948,7 +950,7 @@ sub tokenization_international my ($norm_text) = @_; $norm_text =~ s///g; # strip "skipped" tags - $norm_text =~ s/\p{Hyphen}\p{Zl}//g; # strip end-of-line hyphenation and join lines + $norm_text =~ s/\p{Line_Break}\p{Zl}//g; # strip end-of-line hyphenation and join lines $norm_text =~ s/\p{Zl}/ /g; # join lines # replace entities From c6eec7335faf2cf6b1231a9622fa99ec747c3244 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 27 Dec 2016 13:55:03 +0000 Subject: [PATCH 054/176] distortion-limit=-1 is unlimited distortion --- contrib/moses2/FF/Distortion.cpp | 2 ++ contrib/moses2/HypothesisColl.cpp | 10 ++++++++-- contrib/moses2/LM/LanguageModel.cpp | 2 ++ contrib/moses2/PhraseBased/Search.cpp | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp index 4220b731f..343e1d21f 100644 --- a/contrib/moses2/FF/Distortion.cpp +++ b/contrib/moses2/FF/Distortion.cpp @@ -118,6 +118,8 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, DistortionState_traditional &stateCast = static_cast(state); stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos()); + + //cerr << "hypo=" << hypo.Debug(mgr.system) << endl; } SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 4566cc925..def18a16a 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -60,18 +60,18 @@ void HypothesisColl::Add( } SCORE futureScore = hypo->GetFutureScore(); + /* cerr << "scores:" << futureScore << " " << m_bestScore << " " - << m_minBeamScore << " " << GetSize() << " " << endl; */ if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; + cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; hypoRecycle.Recycle(hypo); return; } @@ -111,14 +111,18 @@ void HypothesisColl::Add( StackAdd HypothesisColl::Add(const HypothesisBase *hypo) { std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo); + //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl; // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists + //cerr << "Added " << hypo << endl; return StackAdd(true, NULL); } else { HypothesisBase *hypoExisting = const_cast(*addRet.first); + //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl; + if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) { // incoming hypo is better than the one we have const HypothesisBase * const &hypoExisting1 = *addRet.first; @@ -126,10 +130,12 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) const_cast(hypoExisting1); hypoExisting2 = hypo; + //cerr << "Added " << hypo << " dicard existing " << hypoExisting2 << endl; return StackAdd(true, hypoExisting); } else { // already storing the best hypo. discard incoming hypo + //cerr << "Keep existing " << hypoExisting << " dicard new " << hypo << endl; return StackAdd(false, hypoExisting); } } diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp index 8a6fe3b39..d4eeb7b1e 100644 --- a/contrib/moses2/LM/LanguageModel.cpp +++ b/contrib/moses2/LM/LanguageModel.cpp @@ -146,6 +146,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { + cerr << "start LanguageModel::EvaluateInIsolation" << endl; if (targetPhrase.GetSize() == 0) { return; } @@ -174,6 +175,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore); estimatedScore += weightedScore; + cerr << "end LanguageModel::EvaluateInIsolation" << endl; } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, diff --git a/contrib/moses2/PhraseBased/Search.cpp b/contrib/moses2/PhraseBased/Search.cpp index 6c35aa151..1a85e15f5 100644 --- a/contrib/moses2/PhraseBased/Search.cpp +++ b/contrib/moses2/PhraseBased/Search.cpp @@ -39,6 +39,10 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, return false; } + if (mgr.system.options.reordering.max_distortion == -1) { + return true; + } + if (mgr.system.options.reordering.max_distortion >= 0) { // distortion limit int distortion = ComputeDistortionDistance(hypoRangeEndPos, From e14a71c3ea7483a2f7d96007b9bb88ea576226af Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 28 Dec 2016 10:57:19 +0000 Subject: [PATCH 055/176] debugging messages --- contrib/moses2/HypothesisColl.cpp | 2 +- contrib/moses2/LM/LanguageModel.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index def18a16a..a75113d58 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -71,7 +71,7 @@ void HypothesisColl::Add( if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; + //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; hypoRecycle.Recycle(hypo); return; } diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp index d4eeb7b1e..3e0c39d20 100644 --- a/contrib/moses2/LM/LanguageModel.cpp +++ b/contrib/moses2/LM/LanguageModel.cpp @@ -146,7 +146,6 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { - cerr << "start LanguageModel::EvaluateInIsolation" << endl; if (targetPhrase.GetSize() == 0) { return; } @@ -174,8 +173,6 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, scores.PlusEquals(system, *this, score); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore); estimatedScore += weightedScore; - - cerr << "end LanguageModel::EvaluateInIsolation" << endl; } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, From c30b28f43b902e48e399ab5cf6c60f6f62c1fb50 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 29 Dec 2016 12:03:59 +0000 Subject: [PATCH 056/176] Edge case of wall at the end of sentence /Mike Ladwig --- contrib/moses2/PhraseBased/ReorderingConstraint.cpp | 1 + contrib/moses2/PhraseBased/Sentence.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp b/contrib/moses2/PhraseBased/ReorderingConstraint.cpp index d2211d817..cff09cc24 100644 --- a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp +++ b/contrib/moses2/PhraseBased/ReorderingConstraint.cpp @@ -60,6 +60,7 @@ void ReorderingConstraint::FinalizeWalls() void ReorderingConstraint::SetWall( size_t pos, bool value ) { //cerr << "SETTING reordering wall at position " << pos << std::endl; + UTIL_THROW_IF2(pos >= m_size, "Wall over length of sentence: " << pos << " >= " << m_size); m_wall[pos] = value; m_active = true; } diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/contrib/moses2/PhraseBased/Sentence.cpp index d0c728530..dbedf878e 100644 --- a/contrib/moses2/PhraseBased/Sentence.cpp +++ b/contrib/moses2/PhraseBased/Sentence.cpp @@ -84,7 +84,7 @@ Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, for(size_t i=0; iGetNodeName(), "wall") == 0) { - UTIL_THROW_IF2(xmlOption->startPos >= ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please + UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please reorderingConstraint.SetWall(xmlOption->startPos - 1, true); } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) { From 347980fd426dcbc2cc2126c187f22473f819a3d4 Mon Sep 17 00:00:00 2001 From: Doried Abd-Allah Date: Thu, 29 Dec 2016 15:16:07 +0200 Subject: [PATCH 057/176] fixing errors while building with oxlm: renamed iptr to boost_iptr because of a conflict with eigen library required by oxlm, added boost_serialization to oxlm requirements in jamroot --- Jamroot | 1 - moses/TranslationModel/UG/mm/ug_typedefs.h | 2 +- moses/TranslationModel/UG/test-ranked-phrase-lookup.cc | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Jamroot b/Jamroot index 7a7be5c93..1c4d68abd 100644 --- a/Jamroot +++ b/Jamroot @@ -183,7 +183,6 @@ requirements += [ option.get "with-mm" : : MAX_NUM_FACTORS=4 ] ; requirements += [ option.get "unlabelled-source" : : UNLABELLED_SOURCE ] ; if [ option.get "with-oxlm" ] { - external-lib boost_serialization ; external-lib gomp ; requirements += boost_serialization ; requirements += gomp ; diff --git a/moses/TranslationModel/UG/mm/ug_typedefs.h b/moses/TranslationModel/UG/mm/ug_typedefs.h index fc9d7faef..4815846a2 100644 --- a/moses/TranslationModel/UG/mm/ug_typedefs.h +++ b/moses/TranslationModel/UG/mm/ug_typedefs.h @@ -36,7 +36,7 @@ namespace sapt #ifndef SPTR #define SPTR boost::shared_ptr #endif -#define iptr boost::intrusive_ptr +#define boost_iptr boost::intrusive_ptr #define scoptr boost::scoped_ptr #define rcast reinterpret_cast #endif diff --git a/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc b/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc index 613e46360..ff9f1c722 100644 --- a/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc +++ b/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc @@ -76,7 +76,7 @@ int main(int argc, char* argv[]) { typedef vector > pplist_t; interpret_args(argc, argv); - iptr Bptr(new mmbitext); + boost_iptr Bptr(new mmbitext); mmbitext& B = *Bptr;// static_cast(Bptr.get()); B.open(bname, L1, L2); B.V1->setDynamic(true); From d4642a34c1550564b59f852af76426574bfd774f Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sun, 1 Jan 2017 22:54:48 +0000 Subject: [PATCH 058/176] add completed-hypo to Distortion FF --- contrib/moses2/FF/Distortion.cpp | 31 ++++++++++++++++++++++++++----- contrib/moses2/FF/Distortion.h | 11 ++++++++--- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp index 343e1d21f..9e55ff798 100644 --- a/contrib/moses2/FF/Distortion.cpp +++ b/contrib/moses2/FF/Distortion.cpp @@ -57,6 +57,7 @@ struct DistortionState_traditional: public FFState Distortion::Distortion(size_t startInd, const std::string &line) : StatefulFeatureFunction(startInd, line) { + m_completedHypo = false; ReadParameters(); } @@ -65,6 +66,16 @@ Distortion::~Distortion() // TODO Auto-generated destructor stub } +void Distortion::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "completed-hypo") { + m_completedHypo = Scan(value); + } + else { + StatefulFeatureFunction::SetParameter(key, value); + } +} + FFState* Distortion::BlankState(MemPool &pool, const System &sys) const { return new (pool.Allocate()) DistortionState_traditional(); @@ -110,7 +121,7 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, const DistortionState_traditional &prev = static_cast(prevState); SCORE distortionScore = CalculateDistortionScore(prev.range, - hypo.GetInputPath().range, prev.first_gap); + hypo.GetInputPath().range, prev.first_gap, hypo.GetBitmap()); //cerr << "distortionScore=" << distortionScore << endl; scores.PlusEquals(mgr.system, *this, distortionScore); @@ -123,11 +134,11 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, } SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, - const int FirstGap) const + const int FirstGap, const Bitmap &coverage) const { bool useEarlyDistortionCost = false; if (!useEarlyDistortionCost) { - return -(SCORE) ComputeDistortionDistance(prev, curr); + return -(SCORE) ComputeDistortionDistance(prev, curr, coverage); } else { /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 @@ -168,7 +179,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, } int Distortion::ComputeDistortionDistance(const Range& prev, - const Range& current) const + const Range& current, const Bitmap &coverage) const { int dist = 0; if (prev.GetNumWordsCovered() == 0) { @@ -176,8 +187,18 @@ int Distortion::ComputeDistortionDistance(const Range& prev, } else { dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1; + dist = abs(dist); + + if (m_completedHypo && coverage.IsComplete()) { + dist += coverage.GetSize() - current.GetEndPos() - 1; + /* + cerr << "completed=" << coverage << " " << coverage.GetSize() << " " + << prev << " " + << current << " " << dist << endl; + */ + } } - return abs(dist); + return dist; } void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr, diff --git a/contrib/moses2/FF/Distortion.h b/contrib/moses2/FF/Distortion.h index 45577d1c3..bc843fe54 100644 --- a/contrib/moses2/FF/Distortion.h +++ b/contrib/moses2/FF/Distortion.h @@ -14,6 +14,7 @@ namespace Moses2 { +class Bitmap; class Distortion: public StatefulFeatureFunction { @@ -21,6 +22,8 @@ public: Distortion(size_t startInd, const std::string &line); virtual ~Distortion(); + virtual void SetParameter(const std::string& key, const std::string& value); + virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, const InputType &input, const Hypothesis &hypo) const; @@ -48,10 +51,12 @@ public: FFState &state) const; protected: - SCORE CalculateDistortionScore(const Range &prev, const Range &curr, - const int FirstGap) const; + bool m_completedHypo; - int ComputeDistortionDistance(const Range& prev, const Range& current) const; + SCORE CalculateDistortionScore(const Range &prev, const Range &curr, + const int FirstGap, const Bitmap &coverage) const; + + int ComputeDistortionDistance(const Range& prev, const Range& current, const Bitmap &coverage) const; }; From 29b0072edac3312a82564ea614a5c64030997061 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 2 Jan 2017 06:02:54 -0500 Subject: [PATCH 059/176] CreateProbingPT2 -> CreateProbingPT --- scripts/training/filter-model-given-input.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl index a16aeac4a..65b2e3502 100755 --- a/scripts/training/filter-model-given-input.pl +++ b/scripts/training/filter-model-given-input.pl @@ -228,7 +228,7 @@ while ( my $line = ) { $phrase_table_impl = "PhraseDictionaryOnDisk"; @toks = set_value( \@toks, "path", "$new_name.bin$table_flag" ); } - elsif ( $binarizer =~ /CreateProbingPT2/ ) { + elsif ( $binarizer =~ /CreateProbingPT/ ) { $phrase_table_impl = "ProbingPT"; @toks = set_value( \@toks, "path", "$new_name.probing$table_flag" ); } @@ -488,7 +488,7 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) { my $cmd = "$binarizer $mid_file $new_file.bin"; safesystem($cmd) or die "Can't binarize"; } - elsif ( $binarizer =~ /CreateProbingPT2/ ) { + elsif ( $binarizer =~ /CreateProbingPT/ ) { my $cmd = "$binarizer --input-pt $mid_file --output-dir $new_file.probing"; if ($opt_hierarchical) { $cmd .= " --scfg"; @@ -509,8 +509,8 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) { if ( $binarizer =~ /CreateOnDiskPt/ ) { $lexbin =~ s/CreateOnDiskPt/processLexicalTable/; } - elsif ( $binarizer =~ /CreateProbingPT2/ ) { - $lexbin =~ s/CreateProbingPT2/processLexicalTableMin/; + elsif ( $binarizer =~ /CreateProbingPT/ ) { + $lexbin =~ s/CreateProbingPT/processLexicalTableMin/; } $lexbin =~ s/PhraseTable/LexicalTable/; From cf93594af98e35329be7120d01255a98d0ad1fa4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 2 Jan 2017 12:44:08 +0000 Subject: [PATCH 060/176] re-implement -feature-overwrite --- contrib/moses2/FF/FeatureFunction.h | 2 +- contrib/moses2/FF/FeatureFunctions.cpp | 42 +++++++++++++++++++++++++- contrib/moses2/FF/FeatureFunctions.h | 5 ++- contrib/moses2/legacy/Parameter.cpp | 4 +-- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/contrib/moses2/FF/FeatureFunction.h b/contrib/moses2/FF/FeatureFunction.h index d38c72b89..1e25fce39 100644 --- a/contrib/moses2/FF/FeatureFunction.h +++ b/contrib/moses2/FF/FeatureFunction.h @@ -80,6 +80,7 @@ public: return m_tuneable; } + virtual void SetParameter(const std::string& key, const std::string& value); // may have more factors than actually need, but not guaranteed. virtual void @@ -118,7 +119,6 @@ protected: std::vector > m_args; bool m_tuneable; - virtual void SetParameter(const std::string& key, const std::string& value); virtual void ReadParameters(); void ParseLine(const std::string &line); }; diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/contrib/moses2/FF/FeatureFunctions.cpp index 8ca145060..49a0ace67 100644 --- a/contrib/moses2/FF/FeatureFunctions.cpp +++ b/contrib/moses2/FF/FeatureFunctions.cpp @@ -103,8 +103,9 @@ void FeatureFunctions::Create() unkWP->SetParameter("suffix", m_system.options.unk.suffix); } } - } + + OverrideFeatures(); } FeatureFunction *FeatureFunctions::Create(const std::string &line) @@ -150,6 +151,17 @@ const FeatureFunction *FeatureFunctions::FindFeatureFunction( return NULL; } +FeatureFunction *FeatureFunctions::FindFeatureFunction( + const std::string &name) +{ + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ + if (ff->GetName() == name) { + return const_cast(ff); + } + } + return NULL; +} + const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd) { // assume only 1 unk wp @@ -243,5 +255,33 @@ void FeatureFunctions::ShowWeights(const Weights &allWeights) } } +void FeatureFunctions::OverrideFeatures() +{ + const Parameter ¶meter = m_system.params; + + const PARAM_VEC *params = parameter.GetParam("feature-overwrite"); + for (size_t i = 0; params && i < params->size(); ++i) { + const string &str = params->at(i); + vector toks = Tokenize(str); + UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str); + + FeatureFunction *ff = FindFeatureFunction(toks[0]); + UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]); + + for (size_t j = 1; j < toks.size(); ++j) { + const string &keyValStr = toks[j]; + vector keyVal = Tokenize(keyValStr, "="); + UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr); + + cerr << "Override " << ff->GetName() << " " + << keyVal[0] << "=" << keyVal[1] << endl; + + ff->SetParameter(keyVal[0], keyVal[1]); + + } + } + +} + } diff --git a/contrib/moses2/FF/FeatureFunctions.h b/contrib/moses2/FF/FeatureFunctions.h index 74c77c7e6..2232e2a97 100644 --- a/contrib/moses2/FF/FeatureFunctions.h +++ b/contrib/moses2/FF/FeatureFunctions.h @@ -95,10 +95,13 @@ protected: System &m_system; size_t m_ffStartInd; + FeatureRegistry m_registry; + FeatureFunction *Create(const std::string &line); std::string GetDefaultName(const std::string &stub); + void OverrideFeatures(); + FeatureFunction *FindFeatureFunction(const std::string &name); - FeatureRegistry m_registry; }; } diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 666eb0e98..bd2cd4676 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -94,8 +94,8 @@ Parameter::Parameter() AddParam(search_opts, "weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated"); - //AddParam(search_opts, "feature-overwrite", - // "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\""); + AddParam(search_opts, "feature-overwrite", + "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\""); po::options_description tune_opts("Options used in tuning."); AddParam(tune_opts, "weight-overwrite", From 999d6b6371437862e9309c5bcfe5ccf78a9782ab Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 2 Jan 2017 12:52:01 -0600 Subject: [PATCH 061/176] Added labelled score breakdown to translation options in TranslationRequest. This will enable a client to get the feature value(s) associated with a particular feature for a particular translation option --- moses/server/TranslationRequest.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index e1821a265..e2580fe2f 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -214,6 +214,9 @@ insertTranslationOptions(Moses::Manager& manager, for (size_t j = 0; j < scores.size(); ++j) scoresXml.push_back(xmlrpc_c::value_double(scores[j])); toptXml["scores"] = xmlrpc_c::value_array(scoresXml); + ostringstream buf; + topt->GetScoreBreakdown().OutputAllFeatureScores(buf, true); + toptXml["labelledScores"] = PackScores(topt->GetScoreBreakdown()); toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); } } From 578e65298f365b7844665d5f2a0f9e298c832ae7 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 2 Jan 2017 12:57:52 -0600 Subject: [PATCH 062/176] Add InMemoryPerSentenceOnDemandLM --- moses/FF/Factory.cpp | 2 + moses/LM/Implementation.cpp | 2 +- moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 91 ++++++++++++++ moses/LM/InMemoryPerSentenceOnDemandLM.h | 135 +++++++++++++++++++++ moses/LM/Jamfile | 2 +- moses/LM/Ken.cpp | 10 ++ moses/LM/Ken.h | 6 + 7 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 moses/LM/InMemoryPerSentenceOnDemandLM.cpp create mode 100644 moses/LM/InMemoryPerSentenceOnDemandLM.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index a048410d0..9ae145504 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -68,6 +68,7 @@ #include "moses/FF/SkeletonStatelessFF.h" #include "moses/FF/SkeletonStatefulFF.h" #include "moses/LM/SkeletonLM.h" +#include "moses/LM/InMemoryPerSentenceOnDemandLM.h" #include "moses/FF/SkeletonTranslationOptionListFeature.h" #include "moses/LM/BilingualLM.h" #include "moses/TranslationModel/SkeletonPT.h" @@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(SkeletonStatelessFF); MOSES_FNAME(SkeletonStatefulFF); MOSES_FNAME(SkeletonLM); + MOSES_FNAME(InMemoryPerSentenceOnDemandLM); MOSES_FNAME(SkeletonTranslationOptionListFeature); MOSES_FNAME(SkeletonPT); diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index eb67100ca..c0a69994d 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector &contextF { if (contextFactor.size() < GetNGramOrder()) { contextFactor.push_back(&word); - } else { + } else if (GetNGramOrder() > 0) { // shift for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) { contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1]; diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp new file mode 100644 index 000000000..12ef78f4e --- /dev/null +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -0,0 +1,91 @@ +#include +#include "InMemoryPerSentenceOnDemandLM.h" +#include "moses/FactorCollection.h" +#include "moses/Util.h" +#include "moses/StaticData.h" +#include "moses/TranslationTask.h" +#include "moses/ContextScope.h" +#include "moses/LM/Ken.h" +#include "lm/model.hh" +#include "util/mmap.hh" + +#include +#include +#include + +using namespace std; + +namespace Moses +{ + InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) +{ + ReadParameters(); +} + +InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM() +{ +} + +void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { + + // The context scope object for this translation task + // contains a map of translation task-specific data + boost::shared_ptr contextScope = ttask->GetScope(); + + // The key to the map is this object + void const* key = static_cast(this); + + // The value stored in the map is a string representing a phrase table + boost::shared_ptr value = contextScope->get(key); + + // Create a stream to read the phrase table data + stringstream strme(*(value.get())); + + char * nullpointer = (char *) 0; + const char * filename = std::tmpnam(nullpointer); + ofstream tmp; + tmp.open(filename); + + // Read the phrase table data, one line at a time + string line; + while (getline(strme, line)) { + + tmp << line << "\n"; + + } + + tmp.close(); + + LanguageModelKen & lm = GetPerThreadLM(); + lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ); + + initialized = true; + + VERBOSE(1, filename); + if (initialized) { + VERBOSE(1, "\tLM initialized\n"); + } + + // std::remove(filename); + +} + +LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const { + + LanguageModelKen *lm; + lm = m_perThreadLM.get(); + if (lm == NULL) { + lm = new LanguageModelKen(); + m_perThreadLM.reset(lm); + } + assert(lm); + return *lm; + +} + + + +} + + + diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h new file mode 100644 index 000000000..f0c1effa7 --- /dev/null +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h @@ -0,0 +1,135 @@ +// $Id$ +#pragma once + +#include +#include "SingleFactor.h" +#include +#include "lm/model.hh" +#include "moses/LM/Ken.h" +#include "moses/FF/FFState.h" + +namespace Moses +{ + +struct InMemoryPerSentenceOnDemandLMState : public FFState { + lm::ngram::State state; + virtual size_t hash() const { + size_t ret = hash_value(state); + return ret; + } + virtual bool operator==(const FFState& o) const { + const InMemoryPerSentenceOnDemandLMState &other = static_cast(o); + bool ret = state == other.state; + return ret; + } + +}; + +class InMemoryPerSentenceOnDemandLM : public LanguageModel +{ +public: + InMemoryPerSentenceOnDemandLM(const std::string &line); + ~InMemoryPerSentenceOnDemandLM(); + + void InitializeForInput(ttasksptr const& ttask); + + virtual void SetParameter(const std::string& key, const std::string& value) { + GetPerThreadLM().SetParameter(key, value); + } + + virtual const FFState* EmptyHypothesisState(const InputType &input) const { + if (initialized) { + return GetPerThreadLM().EmptyHypothesisState(input); + } else { + return new InMemoryPerSentenceOnDemandLMState(); + } + } + + virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + + virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { + if (initialized) { + GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount); + } + } + + virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { + if (initialized) { + GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount); + } + } + + virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) { + GetPerThreadLM().IssueRequestsFor(hypo, input_state); + } + + virtual void sync() { + GetPerThreadLM().sync(); + } + + virtual void SetFFStateIdx(int state_idx) { + if (initialized) { + GetPerThreadLM().SetFFStateIdx(state_idx); + } + } + + virtual void IncrementalCallback(Incremental::Manager &manager) const { + if (initialized) { + GetPerThreadLM().IncrementalCallback(manager); + } + } + + virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const { + if (initialized) { + GetPerThreadLM().ReportHistoryOrder(out, phrase); + } + } + + virtual void EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedScores) const { + if (initialized) { + GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores); + } + } + + bool IsUseable(const FactorMask &mask) const { + return GetPerThreadLM().IsUseable(mask); + } + + +protected: + LanguageModelKen & GetPerThreadLM() const; + + mutable boost::thread_specific_ptr > m_perThreadLM; + + bool initialized; + +}; + + +} diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile index 75b66603c..4eafbd632 100644 --- a/moses/LM/Jamfile +++ b/moses/LM/Jamfile @@ -138,7 +138,7 @@ if $(with-dalm) { #Top-level LM library. If you've added a file that doesn't depend on external #libraries, put it here. -alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp +alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp ../../lm//kenlm ..//headers $(dependencies) ; alias macros : : : : $(lmmacros) ; diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index c7ac663cc..e42e60274 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -105,6 +105,7 @@ template void LanguageModelKen::LoadModel(const std::string config.load_method = load_method; m_ngram.reset(new Model(file.c_str(), config)); + VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n"); } template LanguageModelKen::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method) @@ -116,6 +117,15 @@ template LanguageModelKen::LanguageModelKen(const std::stri LoadModel(file, load_method); } +template LanguageModelKen::LanguageModelKen() + :LanguageModel("KENLM") + ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_)) + ,m_factorType(0) +{ + ReadParameters(); +} + + template LanguageModelKen::LanguageModelKen(const LanguageModelKen ©_from) :LanguageModel(copy_from.GetArgLine()), m_ngram(copy_from.m_ngram), diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index 4934228c2..33590d659 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/TypeDef.h" #include "moses/Word.h" + + namespace Moses { //class LanguageModel; class FFState; +class InMemoryPerSentenceOnDemandLM; LanguageModel *ConstructKenLM(const std::string &line); @@ -67,6 +70,8 @@ public: virtual bool IsUseable(const FactorMask &mask) const; + friend class InMemoryPerSentenceOnDemandLM; + protected: boost::shared_ptr m_ngram; @@ -84,6 +89,7 @@ protected: std::vector m_lmIdLookup; private: + LanguageModelKen(); LanguageModelKen(const LanguageModelKen ©_from); // Convert last words of hypothesis into vocab ids, returning an end pointer. From a18b6676b1aecd60973d1a4a81abc995ba1b9fe0 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 2 Jan 2017 14:22:26 -0600 Subject: [PATCH 063/176] Allow user to specify whether PhraseDictionaryMemoryPerSentenceOnDemand are probability values or not --- .../PhraseDictionaryMemoryPerSentenceOnDemand.cpp | 10 +++++++--- .../PhraseDictionaryMemoryPerSentenceOnDemand.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp index 4675d06c8..072e482de 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -8,7 +8,7 @@ using namespace std; namespace Moses { PhraseDictionaryMemoryPerSentenceOnDemand::PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line) - : PhraseDictionary(line, true) + : PhraseDictionary(line, true), m_valuesAreProbabilities(true) { ReadParameters(); } @@ -67,8 +67,10 @@ void PhraseDictionaryMemoryPerSentenceOnDemand::InitializeForInput(ttasksptr con // score for this phrase table vector scores = Tokenize(toks[2]); - std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore); - std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore); + if (m_valuesAreProbabilities) { + std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore); + std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore); + } target->GetScoreBreakdown().PlusEquals(this, scores); // score of all other ff when this rule is being loaded @@ -129,6 +131,8 @@ PhraseDictionaryMemoryPerSentenceOnDemand::SetParameter(const std::string& key, { if (key == "path") { UTIL_THROW(util::Exception, "PhraseDictionaryMemoryPerSentenceOnDemand does not support key \"path\"."); + } else if (key == "valuesAreProbabilities") { + m_valuesAreProbabilities = Scan(value); } else { PhraseDictionary::SetParameter(key, value); } diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h index bcda0ef77..e3fe6514a 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h @@ -39,6 +39,8 @@ protected: typedef boost::unordered_map Coll; mutable boost::thread_specific_ptr m_coll; + bool m_valuesAreProbabilities; + Coll &GetColl() const; }; From ab2e48415fa50faa41106f9f69339ff4ab01de73 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 2 Jan 2017 15:55:33 -0500 Subject: [PATCH 064/176] add back -text-type for EMS --- contrib/moses2/legacy/Parameter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index bd2cd4676..870a49f2a 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -373,6 +373,9 @@ Parameter::Parameter() /////////////////////////////////////////////////////////////////////////////////////// // DEPRECATED options po::options_description deprec_opts("Deprecated Options"); + AddParam(deprec_opts, "text-type", + "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features"); + /* AddParam(deprec_opts, "link-param-count", "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)"); @@ -412,8 +415,6 @@ Parameter::Parameter() "DEPRECATED. DO NOT USE. weight for unknown word penalty"); AddParam(deprec_opts, "weight-e", "e", "DEPRECATED. DO NOT USE. weight for word deletion"); - AddParam(deprec_opts, "text-type", - "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features"); AddParam(deprec_opts, "input-scores", "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)"); AddParam(deprec_opts, "dlm-model", From ff12a13eaaef2e6272123d5865f516ed4513bc07 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 2 Jan 2017 16:37:56 -0500 Subject: [PATCH 065/176] re-tune if decoder changed. eg moses -> moses2 --- scripts/ems/experiment.meta | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 16fc20336..d6e6dc133 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -1196,7 +1196,7 @@ tune default-name: tuning/moses.ini tmp-name: tuning/tmp final-model: yes - rerun-on-change: decoder-settings tuning-settings nbest lambda async + rerun-on-change: decoder decoder-settings tuning-settings nbest lambda async not-error: trans: No such file or directory thot-tune in: TRAINING:config input reference From 80bd5597578cddc72244c8f53d18a2aabdca27b9 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Tue, 3 Jan 2017 00:00:36 +0000 Subject: [PATCH 066/176] daily automatic beautifier --- moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 10 ++++++---- moses/LM/InMemoryPerSentenceOnDemandLM.h | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp index 12ef78f4e..364aebe42 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -17,7 +17,7 @@ using namespace std; namespace Moses { - InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) +InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) { ReadParameters(); } @@ -26,7 +26,8 @@ InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM() { } -void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { +void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) +{ // The context scope object for this translation task // contains a map of translation task-specific data @@ -63,14 +64,15 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { VERBOSE(1, filename); if (initialized) { - VERBOSE(1, "\tLM initialized\n"); + VERBOSE(1, "\tLM initialized\n"); } // std::remove(filename); } -LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const { +LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const +{ LanguageModelKen *lm; lm = m_perThreadLM.get(); diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h index f0c1effa7..022ba9289 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.h +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h @@ -89,7 +89,7 @@ public: virtual void sync() { GetPerThreadLM().sync(); } - + virtual void SetFFStateIdx(int state_idx) { if (initialized) { GetPerThreadLM().SetFFStateIdx(state_idx); @@ -107,7 +107,7 @@ public: GetPerThreadLM().ReportHistoryOrder(out, phrase); } } - + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown From 02772c07dec22acb1d50397651ad189b0f97e1e6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 3 Jan 2017 11:06:26 +0000 Subject: [PATCH 067/176] revert changes to Distortion FF --- contrib/moses2/FF/Distortion.cpp | 32 +++++--------------------------- contrib/moses2/FF/Distortion.h | 9 ++------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp index 9e55ff798..1d7b7246d 100644 --- a/contrib/moses2/FF/Distortion.cpp +++ b/contrib/moses2/FF/Distortion.cpp @@ -57,7 +57,6 @@ struct DistortionState_traditional: public FFState Distortion::Distortion(size_t startInd, const std::string &line) : StatefulFeatureFunction(startInd, line) { - m_completedHypo = false; ReadParameters(); } @@ -66,16 +65,6 @@ Distortion::~Distortion() // TODO Auto-generated destructor stub } -void Distortion::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "completed-hypo") { - m_completedHypo = Scan(value); - } - else { - StatefulFeatureFunction::SetParameter(key, value); - } -} - FFState* Distortion::BlankState(MemPool &pool, const System &sys) const { return new (pool.Allocate()) DistortionState_traditional(); @@ -121,7 +110,7 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, const DistortionState_traditional &prev = static_cast(prevState); SCORE distortionScore = CalculateDistortionScore(prev.range, - hypo.GetInputPath().range, prev.first_gap, hypo.GetBitmap()); + hypo.GetInputPath().range, prev.first_gap); //cerr << "distortionScore=" << distortionScore << endl; scores.PlusEquals(mgr.system, *this, distortionScore); @@ -134,11 +123,11 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, } SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, - const int FirstGap, const Bitmap &coverage) const + const int FirstGap) const { bool useEarlyDistortionCost = false; if (!useEarlyDistortionCost) { - return -(SCORE) ComputeDistortionDistance(prev, curr, coverage); + return -(SCORE) ComputeDistortionDistance(prev, curr); } else { /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 @@ -179,7 +168,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, } int Distortion::ComputeDistortionDistance(const Range& prev, - const Range& current, const Bitmap &coverage) const + const Range& current) const { int dist = 0; if (prev.GetNumWordsCovered() == 0) { @@ -187,18 +176,8 @@ int Distortion::ComputeDistortionDistance(const Range& prev, } else { dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1; - dist = abs(dist); - - if (m_completedHypo && coverage.IsComplete()) { - dist += coverage.GetSize() - current.GetEndPos() - 1; - /* - cerr << "completed=" << coverage << " " << coverage.GetSize() << " " - << prev << " " - << current << " " << dist << endl; - */ - } } - return dist; + return abs(dist); } void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr, @@ -209,4 +188,3 @@ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr, } } - diff --git a/contrib/moses2/FF/Distortion.h b/contrib/moses2/FF/Distortion.h index bc843fe54..45577d1c3 100644 --- a/contrib/moses2/FF/Distortion.h +++ b/contrib/moses2/FF/Distortion.h @@ -14,7 +14,6 @@ namespace Moses2 { -class Bitmap; class Distortion: public StatefulFeatureFunction { @@ -22,8 +21,6 @@ public: Distortion(size_t startInd, const std::string &line); virtual ~Distortion(); - virtual void SetParameter(const std::string& key, const std::string& value); - virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, const InputType &input, const Hypothesis &hypo) const; @@ -51,12 +48,10 @@ public: FFState &state) const; protected: - bool m_completedHypo; - SCORE CalculateDistortionScore(const Range &prev, const Range &curr, - const int FirstGap, const Bitmap &coverage) const; + const int FirstGap) const; - int ComputeDistortionDistance(const Range& prev, const Range& current, const Bitmap &coverage) const; + int ComputeDistortionDistance(const Range& prev, const Range& current) const; }; From 2a5e40ed60d351f05ca58ad3be6ec0865d08373f Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Wed, 4 Jan 2017 22:01:45 -0600 Subject: [PATCH 068/176] New file: Lithuanian --- .../nonbreaking_prefix.lt | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt new file mode 100644 index 000000000..d7829e3c0 --- /dev/null +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt @@ -0,0 +1,110 @@ +# Anything in this file, followed by a period (and an upper-case word), +# does NOT indicate an end-of-sentence marker. +# Special cases are included for prefixes that ONLY appear before 0-9 numbers. + +# Any single upper case letter followed by a period is not a sentence ender +# (excluding I occasionally, but we leave it in) +# usually upper case letters are initials in a name +A +Ā +B +C +Č +D +E +Ē +F +G +Ģ +H +I +Ī +J +K +Ķ +L +Ļ +M +N +Ņ +O +P +Q +R +S +Š +T +U +Ū +V +W +X +Y +Z +Ž + +# Abbreviations m. menesis d. diena g. gimes +m +d +g + +# Day and month abbreviations +# Pirmadienis Penktadienis +Pr +Pn +Pirm +Antr +Treč +Ketv +Penkt +Šešt +Sekm +Saus +Vas +Kov +Bal +Geg +Birž +Liep +Rugpj +Rugs +Spal +Lapkr +Gruod + +# List of titles. These are often followed by upper-case names, but do +# not indicate sentence breaks +# +# Gerbiamasis +Gerb + +# XXX TODO .. Below are not quite correct, copied from latvian +dr +Dr +med +prof +Prof +inž +Inž +ist.loc +Ist.loc +kor.loc +Kor.loc +v.i +vietn +Vietn + +# misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT +# fall into this category - it sometimes ends a sentence) +# angl angliskai +# dab dabartine +angl +dab + + +#Numbers only. These should only induce breaks when followed by a numeric sequence +# add NUMERIC_ONLY after the word for this function +#This case is mostly for the english "No." which can either be a sentence of its own, or +#if followed by a number, a non-breaking prefix +No #NUMERIC_ONLY# +Nr #NUMERIC_ONLY# From 3ef84b133cc8bf64862b3c2dad254e7043439fb7 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Wed, 4 Jan 2017 22:30:53 -0600 Subject: [PATCH 069/176] More abbreviations --- .../nonbreaking_prefix.lt | 277 +++++++++++++++++- 1 file changed, 274 insertions(+), 3 deletions(-) diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt index d7829e3c0..4e2f6677e 100644 --- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt @@ -43,12 +43,11 @@ Y Z Ž -# Abbreviations m. menesis d. diena g. gimes +# Day and month abbreviations +# m. menesis d. diena g. gimes m d g - -# Day and month abbreviations # Pirmadienis Penktadienis Pr Pn @@ -72,6 +71,278 @@ Spal Lapkr Gruod +# Technical terms, abbreviations used in guidebooks, advertisments, etc. +# Generally lower-case. +air +# airiškai +amer +# amerikanizmas +anat +# anatomija +arab +# arabų +archeol +archit +asm +# asmuo +astr +# astronomija +austral +# australiškai +aut +# automobilis +av +# aviacija +bažn +bdv +# būdvardis +bibl +# Biblija +biol +# biologija +bot +# botanika +buh +# buhalterija +chem +# chemija +d +# didysis +dgs +# daugiskaita +dial +# dialektizmas +dipl +dktv +# daiktavardis +džn +# dažnai +ekon +el +# elektra +esam +# esamasis laikas +euf +# eufemizmas +fam +# familiariai +farm +# farmacija +filos +# filosofija +fin +# finansai +fiz +# fizika +fiziol +flk +# folkloras +fon +# fonetika +fot +# fotografija +geod +# geodezija +geogr +geol +# geologija +geom +# geometrija +glžk +gr +# graikų +gram +her +# heraldika +hidr +# hidrotechnika +ind +# Indų +iron +# ironiškai +isp +# ispanų +ist +# istorija +it +# italų +įv +reikšm +įv.reikšm +# įvairiomis reikšmėmis +jap +# japonų +juok +# juokaujamai +jūr +# jūrininkystė +kalb +# kalbotyra +kar +# karyba +kas +# kasyba +kin +# kinematografija +klaus +# klausiamasis +knyg +# knyginis +kom +# komercija +komp +# kompiuteris +kosm +# kosmonautika +kt +# kitas +kul +# kulinarija +kuop +# kuopine +l +# laikas +lit +# literatūrinis +log +# logika +lot +# lotynų +mat +# matematika +maž +# mažybinis +med +# medicina +medž +# medžioklė +men +# menas +menk +# menkinamai +metal +# metalurgija +meteor +min +# mineralogija +mit +# mitologija +mok +# mokyklinis +muz +# muzikinis +n +# naujasis +neig +# neigiamasis +niek +# niekinamai +ofic +# oficialus +opt +# optika +p +# pietūs +pan +# panašiai +parl +# parlamentas +pat +# patarlė +paž +# pažodžiui +plg +# palygink +poet +# poetizmas +poligr +# poligrafija +polit +# politika +ppr +# paprastai +pr +# prancūzų +prk +# perkeltine +psn +# pasenęs žodis +psich +# psichologija +pvz +# pavyzdžiui +r +# rytai +rad +# radiotechnika +rel +# religija +ret +# retai +rus +# rusų +sen +# senasis +sl +# slengas +spec +# specialus +sport +stat +# statyba +sudurt +# sudurtinis +sutr +# sutrumpintas +š +# šiaurė +šach +# šachmatai +šiaur +škot +# škotiškai +šnek +# šnekamoji +teatr +tech +# technika +teig +# teigiamas +teis +# teisė +tekst +# tekstilė +tel +v +# tik vyriškosios, vakarai +t.p +t +p +# taip pat +vaik +# vaikų +vart +# vartojama +vet +# veterinarija +vid +# vidurinis +vksm +# veiksmažodis +vns +# vienaskaita +vok +# vokiečių +vulg +# vulgariai +zool +žr +# žiūrėk +ž.ū +ž +ū +# žemės ūkis + # List of titles. These are often followed by upper-case names, but do # not indicate sentence breaks # From d10ba6f049d8dc08d95a6a6e6934adf808160320 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Wed, 4 Jan 2017 23:52:28 -0600 Subject: [PATCH 070/176] More abbreviations for LLithuanian. --- .../nonbreaking_prefix.lt | 369 ++++++++++++++++-- 1 file changed, 343 insertions(+), 26 deletions(-) diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt index 4e2f6677e..fa72196d9 100644 --- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt @@ -43,11 +43,18 @@ Y Z Ž +# Initialis -- Džonas +Dz +Dž +Just + # Day and month abbreviations # m. menesis d. diena g. gimes m +mėn d g +gim # Pirmadienis Penktadienis Pr Pn @@ -71,6 +78,279 @@ Spal Lapkr Gruod +# Business, governmental, geographical terms +a +# aikštė +adv +# advokatas +akad +# akademikas +aklg +# akligatvis +akt +# aktorius +al +# alėja +A.V +# antspaudo vieta +aps +apskr +# apskritis +apyg +# apygarda +aps +apskr +# apskritis +asist +# asistentas +asmv +avd +# asmenvardis +a.k +asm +asm.k +# asmens kodas +atsak +# atsakingasis +atsisk +sąsk +# atsiskaitomoji sąskaita +aut +# autorius +b +k +b.k +# banko kodas +bkl +# bakalauras +bt +# butas +buv +# buvęs, -usi +dail +# dailininkas +dek +# dekanas +dėst +# dėstytojas +dir +# direktorius +dirig +# dirigentas +doc +# docentas +drp +# durpynas +dš +# dešinysis +egz +# egzempliorius +eil +# eilutė +ekon +# ekonomika +el +# elektroninis +etc +ež +# ežeras +faks +# faksas +fak +# fakultetas +gen +# generolas +gyd +# gydytojas +gv +# gyvenvietė +įl +# įlanka +Įn +# įnagininkas +insp +# inspektorius +pan +# ir panašiai +t.t +# ir taip toliau +k.a +# kaip antai +kand +# kandidatas +kat +# katedra +kyš +# kyšulys +kl +# klasė +kln +# kalnas +kn +# knyga +koresp +# korespondentas +kpt +# kapitonas +kr +# kairysis +kt +# kitas +kun +# kunigas +l +e +p +l.e.p +# laikinai einantis pareigas +ltn +# leitenantas +m +mst +# miestas +m.e +# mūsų eros +m.m +# mokslo metai +mot +# moteris +mstl +# miestelis +mgr +# magistras +mgnt +# magistrantas +mjr +# majoras +mln +# milijonas +mlrd +# milijardas +mok +# mokinys +mokyt +# mokytojas +moksl +# mokslinis +nkt +# nekaitomas +ntk +# neteiktinas +Nr +nr +# numeris +p +# ponas +p.d +a.d +# pašto dėžutė, abonentinė dėžutė +p.m.e +# prieš mūsų erą +pan +# ir panašiai +pav +# paveikslas +pavad +# pavaduotojas +pirm +# pirmininkas +pl +# plentas +plg +# palygink +plk +# pulkininkas; pelkė +pr +# prospektas +Kr +pr.Kr +# prieš Kristų +prok +# prokuroras +prot +# protokolas +pss +# pusiasalis +pšt +# paštas +pvz +# pavyzdžiui +r +# rajonas +red +# redaktorius +rš +# raštų kalbos +sąs +# sąsiuvinis +saviv +sav +# savivaldybė +sekr +# sekretorius +sen +# seniūnija, seniūnas +sk +# skaityk; skyrius +skg +# skersgatvis +skyr +sk +# skyrius +skv +# skveras +sp +# spauda; spaustuvė +spec +# specialistas +sr +# sritis +st +# stotis +str +# straipsnis +stud +# studentas +š +š.m +# šių metų +šnek +# šnekamosios +tir +# tiražas +tūkst +# tūkstantis +up +# upė +upl +# upelis +vad +# vadinamasis, -oji +vlsč +# valsčius +ved +# vedėjas +vet +# veterinarija +virš +# viršininkas, viršaitis +vyr +# vyriausiasis, -ioji; vyras +vyresn +# vyresnysis +vlsč +# valsčius +vs +# viensėdis +Vt +vt +# vietininkas +vtv +vv +# vietovardis +žml +# žemėlapis + # Technical terms, abbreviations used in guidebooks, advertisments, etc. # Generally lower-case. air @@ -79,6 +359,8 @@ amer # amerikanizmas anat # anatomija +angl +# angl. angliskai arab # arabų archeol @@ -102,12 +384,21 @@ biol # biologija bot # botanika +brt +# burtai, burtažodis. +brus +# baltarusių buh # buhalterija chem # chemija -d -# didysis +col +# collectivum +con +conj +# conjunctivus, jungtukas +dab +# dab. dabartine dgs # daugiskaita dial @@ -128,6 +419,8 @@ fam # familiariai farm # farmacija +filol +# filologija filos # filosofija fin @@ -135,6 +428,7 @@ fin fiz # fizika fiziol +# fiziologija flk # folkloras fon @@ -163,6 +457,7 @@ iron isp # ispanų ist +istor # istorija it # italų @@ -204,6 +499,8 @@ l # laikas lit # literatūrinis +lingv +# lingvistika log # logika lot @@ -229,18 +526,24 @@ mit # mitologija mok # mokyklinis +ms +# mįslė muz # muzikinis n # naujasis neig # neigiamasis +neol +# neologizmas niek # niekinamai ofic # oficialus opt # optika +orig +# original p # pietūs pan @@ -255,16 +558,25 @@ plg # palygink poet # poetizmas +poez +# poezija poligr # poligrafija polit # politika ppr # paprastai +pranc pr -# prancūzų +# prancūzų, prūsų +priet +# prietaras +prek +# prekyba prk # perkeltine +prs +# persona, asmuo psn # pasenęs žodis psich @@ -284,7 +596,9 @@ rus sen # senasis sl -# slengas +# slengas, slavų +sov +# sovietinis spec # specialus sport @@ -294,6 +608,8 @@ sudurt # sudurtinis sutr # sutrumpintas +suv +# suvalkiečių š # šiaurė šach @@ -305,6 +621,7 @@ sutr # šnekamoji teatr tech +techn # technika teig # teigiamas @@ -313,12 +630,19 @@ teis tekst # tekstilė tel +# telefonas +teol +# teologija v # tik vyriškosios, vakarai t.p t p -# taip pat +# ir taip pat +t.t +# ir taip toliau +t.y +# tai yra vaik # vaikų vart @@ -336,6 +660,7 @@ vok vulg # vulgariai zool +# zoologija žr # žiūrėk ž.ū @@ -346,31 +671,24 @@ zool # List of titles. These are often followed by upper-case names, but do # not indicate sentence breaks # +# Jo Eminencija +Em. # Gerbiamasis Gerb - -# XXX TODO .. Below are not quite correct, copied from latvian -dr -Dr -med -prof +gerb +# malonus +malon +# profesorius Prof +prof +# daktaras (mokslų) +Dr +dr +habil +med +# inž inžinierius inž Inž -ist.loc -Ist.loc -kor.loc -Kor.loc -v.i -vietn -Vietn - -# misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT -# fall into this category - it sometimes ends a sentence) -# angl angliskai -# dab dabartine -angl -dab #Numbers only. These should only induce breaks when followed by a numeric sequence @@ -378,4 +696,3 @@ dab #This case is mostly for the english "No." which can either be a sentence of its own, or #if followed by a number, a non-breaking prefix No #NUMERIC_ONLY# -Nr #NUMERIC_ONLY# From ab6816f9a755f37de00090829f62848372e8222e Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 10:08:06 -0600 Subject: [PATCH 071/176] Purely cosmetic cleanup. Use same indentation style throughout; wrap long lines; capitalize sentences; add punctuation; remove trailing whitespace. --- scripts/ems/support/split-sentences.perl | 45 ++++++++++++------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index f72767054..7bad038a1 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -29,10 +29,10 @@ while (@ARGV) { } if ($HELP) { - print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; - print "-q: quiet mode\n"; - print "-b: no output buffering (for use in bidirectional pipes)\n"; - exit; + print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; + print "-q: quiet mode\n"; + print "-b: no output buffering (for use in bidirectional pipes)\n"; + exit; } if (!$QUIET) { print STDERR "Sentence Splitter v3\n"; @@ -64,9 +64,9 @@ if (-e "$prefixfile") { close(PREFIX); } -##loop text, add lines together until we get a blank line or a

+## Loop over text, add lines together until we get a blank line or a

my $text = ""; -while() { +while () { chop; if (/^<.+>$/ || /^\s*$/) { #time to process this block, we've hit a blank or

@@ -79,7 +79,7 @@ while() { $text .= $_. " "; } } -#do the leftover text +# Do the leftover text. &do_it_for($text,"") if $text; @@ -91,28 +91,32 @@ sub do_it_for { } sub preprocess { - #this is one paragraph + # This is one paragraph. my($text) = @_; - # clean up spaces at head and tail of each line as well as any double-spacing + # Clean up spaces at head and tail of each line, as well as + # any double-spacing. $text =~ s/ +/ /g; $text =~ s/\n /\n/g; $text =~ s/ \n/\n/g; $text =~ s/^ //g; $text =~ s/ $//g; - #####add sentence breaks as needed##### + ##### Add sentence breaks as needed ##### - #non-period end of sentence markers (?!) followed by sentence starters. + # Non-period end of sentence markers (?!) followed by sentence starters. $text =~ s/([?!]) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g; - #multi-dots followed by sentence starters + # Multi-dots followed by sentence starters. $text =~ s/(\.[\.]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g; - # add breaks for sentences that end with some sort of punctuation inside a quote or parenthetical and are followed by a possible sentence starter punctuation and upper case + # Add breaks for sentences that end with some sort of punctuation + # inside a quote or parenthetical and are followed by a possible + # sentence starter punctuation and upper case. $text =~ s/([?!\.][\ ]*[\'\"\)\]\p{IsPf}]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\ ]*[\p{IsUpper}])/$1\n$2/g; - # add breaks for sentences that end with some sort of punctuation are followed by a sentence starter punctuation and upper case + # Add breaks for sentences that end with some sort of punctuation, + # and are followed by a sentence starter punctuation and upper case. $text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g; # special punctuation cases are covered. Check all remaining periods. @@ -130,30 +134,27 @@ sub preprocess { } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) { #not breaking - upper case acronym } elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) { - #the next word has a bunch of initial quotes, maybe a space, then either upper case or a number + # The next word has a bunch of initial quotes, maybe a + # space, then either upper case or a number $words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/)); #we always add a return for these unless we have a numeric non-breaker and a number start } - } $text = $text.$words[$i]." "; } - #we stopped one token from the end to allow for easy look-ahead. Append it now. + # We stopped one token from the end to allow for easy look-ahead. Append it now. $text = $text.$words[$i]; - # clean up spaces at head and tail of each line as well as any double-spacing + # Clean up spaces at head and tail of each line as well as any double-spacing $text =~ s/ +/ /g; $text =~ s/\n /\n/g; $text =~ s/ \n/\n/g; $text =~ s/^ //g; $text =~ s/ $//g; - #add trailing break + # Add trailing break. $text .= "\n" unless $text =~ /\n$/; return $text; - } - - From 9f5500a3a8df10ff0a99238c8c81679c9b9420a2 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 10:09:34 -0600 Subject: [PATCH 072/176] oops. --- scripts/ems/support/split-sentences.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 7bad038a1..7f2fb3ced 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -29,7 +29,7 @@ while (@ARGV) { } if ($HELP) { - print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; + print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; print "-q: quiet mode\n"; print "-b: no output buffering (for use in bidirectional pipes)\n"; exit; From 144f43495e73b4e7d6224d798dad20065cd6b21f Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 11:33:10 -0600 Subject: [PATCH 073/176] Preliminary support for Chinese. Also, cleanup some of the comments. --- scripts/ems/support/split-sentences.perl | 50 ++++++++++++++++-------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 7f2fb3ced..7bda05c30 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -2,8 +2,8 @@ # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. - -# Based on Preprocessor written by Philipp Koehn +# +# Based on a preprocessor written by Philipp Koehn. binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); @@ -12,8 +12,9 @@ binmode(STDERR, ":utf8"); use warnings; use FindBin qw($RealBin); use strict; +use utf8; -my $mydir = "$RealBin/../../share/nonbreaking_prefixes"; +my $mydir = "$RealBin/nonbreaking_prefixes"; my %NONBREAKING_PREFIX = (); my $language = "en"; @@ -29,7 +30,7 @@ while (@ARGV) { } if ($HELP) { - print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; + print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; print "-q: quiet mode\n"; print "-b: no output buffering (for use in bidirectional pipes)\n"; exit; @@ -41,7 +42,7 @@ if (!$QUIET) { my $prefixfile = "$mydir/nonbreaking_prefix.$language"; -#default back to English if we don't have a language-specific prefix file +# Default to English, if we don't have a language-specific prefix file. if (!(-e $prefixfile)) { $prefixfile = "$mydir/nonbreaking_prefix.en"; print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n"; @@ -69,13 +70,13 @@ my $text = ""; while () { chop; if (/^<.+>$/ || /^\s*$/) { - #time to process this block, we've hit a blank or

- &do_it_for($text,$_); - print "

\n" if (/^\s*$/ && $text); ##if we have text followed by

+ # Time to process this block; we've hit a blank or

+ &do_it_for($text, $_); + print "

\n" if (/^\s*$/ && $text); ## If we have text followed by

$text = ""; } else { - #append the text, with a space + # Append the text, with a space. $text .= $_. " "; } } @@ -91,7 +92,7 @@ sub do_it_for { } sub preprocess { - # This is one paragraph. + # Argument is one paragraph. my($text) = @_; # Clean up spaces at head and tail of each line, as well as @@ -119,31 +120,46 @@ sub preprocess { # and are followed by a sentence starter punctuation and upper case. $text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g; - # special punctuation cases are covered. Check all remaining periods. + # Chinese uses unusual end-of-sentence markers. These are NOT + # followed by whitespace. Nor is there any idea of capitalization. + # There does not appear to be any unicode category for full-stops + # in general, so list them here. U+3002 U+FF0E U+FF1F U+FF01 + $text =~ s/([。.?!♪])/$1\n/g; + + # Chinese does not use any sort of white-space between ideographs. + # Nominally, each single ideograph corresponds to one word. Add + # spaces here, so that later processing stages can tokenize readily. + # Note that this handles mixed latinate+CJK. + $text =~ s/(\p{InCJK})/ $1 /g; + $text =~ s/ +/ /g; + + # Special punctuation cases are covered. Check all remaining periods. my $word; my $i; my @words = split(/ /,$text); $text = ""; for ($i=0;$i<(scalar(@words)-1);$i++) { if ($words[$i] =~ /([\p{IsAlnum}\.\-]*)([\'\"\)\]\%\p{IsPf}]*)(\.+)$/) { - #check if $1 is a known honorific and $2 is empty, never break + # Check if $1 is a known honorific and $2 is empty, never break. my $prefix = $1; my $starting_punct = $2; - if($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) { - #not breaking; + if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) { + # Not breaking; } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) { - #not breaking - upper case acronym + # Not breaking - upper case acronym } elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) { # The next word has a bunch of initial quotes, maybe a # space, then either upper case or a number $words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/)); - #we always add a return for these unless we have a numeric non-breaker and a number start + # We always add a return for these, unless we have a + # numeric non-breaker and a number start. } } $text = $text.$words[$i]." "; } - # We stopped one token from the end to allow for easy look-ahead. Append it now. + # We stopped one token from the end to allow for easy look-ahead. + # Append it now. $text = $text.$words[$i]; # Clean up spaces at head and tail of each line as well as any double-spacing From 203c7c63875ff8748abc68886f62d0b1a2b20b26 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 11:34:38 -0600 Subject: [PATCH 074/176] Preliminary support for Chinese. --- .../nonbreaking_prefix.zh | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh new file mode 100644 index 000000000..077710c87 --- /dev/null +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh @@ -0,0 +1,53 @@ +# +# Chinese (Mandarin, Cantonese) +# +# Anything in this file, followed by a period, +# does NOT indicate an end-of-sentence marker. +# +# English/Euro-language given-name initials (appearing in +# news, periodicals, etc.) +A +Ā +B +C +Č +D +E +Ē +F +G +Ģ +H +I +Ī +J +K +Ķ +L +Ļ +M +N +Ņ +O +P +Q +R +S +Š +T +U +Ū +V +W +X +Y +Z +Ž + +# Numbers only. These should only induce breaks when followed by +# a numeric sequence. +# Add NUMERIC_ONLY after the word for this function. This case is +# mostly for the english "No." which can either be a sentence of its +# own, or if followed by a number, a non-breaking prefix. +No #NUMERIC_ONLY# +Nr #NUMERIC_ONLY# From 1933bcbf33bf1726c66f4013def40ad4bba13c7d Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 11:39:01 -0600 Subject: [PATCH 075/176] Whoops, revert cut-n-paste damage in previous commit. --- scripts/ems/support/split-sentences.perl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 7bda05c30..0c3e4a2d5 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -14,7 +14,7 @@ use FindBin qw($RealBin); use strict; use utf8; -my $mydir = "$RealBin/nonbreaking_prefixes"; +my $mydir = "$RealBin/../../share/nonbreaking_prefixes"; my %NONBREAKING_PREFIX = (); my $language = "en"; @@ -30,7 +30,7 @@ while (@ARGV) { } if ($HELP) { - print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; + print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n"; print "-q: quiet mode\n"; print "-b: no output buffering (for use in bidirectional pipes)\n"; exit; From bd9d12351b6a5648d45b54c9293e2c1d5fc20722 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 12:53:21 -0600 Subject: [PATCH 076/176] Create a Cantonese version, distinct from Mandarin. The content is identical, at this moment, but having distinct langauge suffixes solves processing-pipeline problems later on. --- .../nonbreaking_prefix.yue | 53 +++++++++++++++++++ .../nonbreaking_prefix.zh | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.yue diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.yue b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.yue new file mode 100644 index 000000000..37942ade9 --- /dev/null +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.yue @@ -0,0 +1,53 @@ +# +# Cantonese (Chinese) +# +# Anything in this file, followed by a period, +# does NOT indicate an end-of-sentence marker. +# +# English/Euro-language given-name initials (appearing in +# news, periodicals, etc.) +A +Ā +B +C +Č +D +E +Ē +F +G +Ģ +H +I +Ī +J +K +Ķ +L +Ļ +M +N +Ņ +O +P +Q +R +S +Š +T +U +Ū +V +W +X +Y +Z +Ž + +# Numbers only. These should only induce breaks when followed by +# a numeric sequence. +# Add NUMERIC_ONLY after the word for this function. This case is +# mostly for the english "No." which can either be a sentence of its +# own, or if followed by a number, a non-breaking prefix. +No #NUMERIC_ONLY# +Nr #NUMERIC_ONLY# diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh index 077710c87..df4c2ff88 100644 --- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh @@ -1,5 +1,5 @@ # -# Chinese (Mandarin, Cantonese) +# Mandarin (Chinese) # # Anything in this file, followed by a period, # does NOT indicate an end-of-sentence marker. From 1f744ecd9bdb2bb21034fc497773228135fc2801 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Thu, 5 Jan 2017 15:48:30 -0600 Subject: [PATCH 077/176] Fixed a subtle possibly thread-related thread --- moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 28 ++++++---- moses/LM/InMemoryPerSentenceOnDemandLM.h | 59 +++++++++++++++++----- 2 files changed, 63 insertions(+), 24 deletions(-) diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp index 12ef78f4e..87676f12d 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -17,7 +17,7 @@ using namespace std; namespace Moses { - InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) + InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), m_factorType(0) { ReadParameters(); } @@ -25,7 +25,7 @@ namespace Moses InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM() { } - + void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { // The context scope object for this translation task @@ -56,15 +56,11 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { tmp.close(); - LanguageModelKen & lm = GetPerThreadLM(); - lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ); + // m_tmpFilename.reset(new std::string("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm")); + m_tmpFilename.reset(new std::string(filename)); - initialized = true; - - VERBOSE(1, filename); - if (initialized) { - VERBOSE(1, "\tLM initialized\n"); - } + //LanguageModelKen & lm = + GetPerThreadLM(); // std::remove(filename); @@ -76,9 +72,21 @@ LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPer lm = m_perThreadLM.get(); if (lm == NULL) { lm = new LanguageModelKen(); + + string* filename = m_tmpFilename.get(); + if (filename == NULL) { + UTIL_THROW(util::Exception, "Can't get a thread-specific LM because no temporary filename has been set for this thread\n"); + } else { + lm->LoadModel(*filename, util::POPULATE_OR_READ); + } + + VERBOSE(1, filename); + VERBOSE(1, "\tLM initialized\n"); + m_perThreadLM.reset(lm); } assert(lm); + return *lm; } diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h index f0c1effa7..9a2e4123e 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.h +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h @@ -1,6 +1,7 @@ // $Id$ #pragma once +#include #include #include "SingleFactor.h" #include @@ -38,7 +39,7 @@ public: } virtual const FFState* EmptyHypothesisState(const InputType &input) const { - if (initialized) { + if (isInitialized()) { return GetPerThreadLM().EmptyHypothesisState(input); } else { return new InMemoryPerSentenceOnDemandLMState(); @@ -46,7 +47,7 @@ public: } virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { - if (initialized) { + if (isInitialized()) { return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out); } else { UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); @@ -54,7 +55,7 @@ public: } virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const { - if (initialized) { + if (isInitialized()) { return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator); } else { UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); @@ -62,7 +63,7 @@ public: } virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const { - if (initialized) { + if (isInitialized()) { return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator); } else { UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); @@ -71,40 +72,58 @@ public: virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::CalcScore called prior to being initialized"); } } virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::CalcScoreFromCache called prior to being initialized"); } } virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) { - GetPerThreadLM().IssueRequestsFor(hypo, input_state); + if (isInitialized()) { + GetPerThreadLM().IssueRequestsFor(hypo, input_state); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::IssueRequestsFor called prior to being initialized"); + } } virtual void sync() { - GetPerThreadLM().sync(); + if (isInitialized()) { + GetPerThreadLM().sync(); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::sync called prior to being initialized"); + } } virtual void SetFFStateIdx(int state_idx) { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().SetFFStateIdx(state_idx); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::SetFFStateIdx called prior to being initialized"); } } virtual void IncrementalCallback(Incremental::Manager &manager) const { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().IncrementalCallback(manager); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::IncrementalCallback called prior to being initialized"); } } virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().ReportHistoryOrder(out, phrase); + } else { + UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::ReportHistoryOrder called prior to being initialized"); } } @@ -112,13 +131,16 @@ public: , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const { - if (initialized) { + if (isInitialized()) { GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores); + } else { + // UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::EvaluateInIsolation called prior to being initialized"); } } bool IsUseable(const FactorMask &mask) const { - return GetPerThreadLM().IsUseable(mask); + bool ret = mask[m_factorType]; + return ret; } @@ -126,8 +148,17 @@ protected: LanguageModelKen & GetPerThreadLM() const; mutable boost::thread_specific_ptr > m_perThreadLM; + mutable boost::thread_specific_ptr m_tmpFilename; - bool initialized; + FactorType m_factorType; + + bool isInitialized() const { + if (m_tmpFilename.get() == NULL) { + return false; + } else { + return true; + } + } }; From 9fa538ce5c77a4478f36b27b8ccd8502c18df62f Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Fri, 6 Jan 2017 00:00:50 +0000 Subject: [PATCH 078/176] daily automatic beautifier --- moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 6 +++--- moses/LM/InMemoryPerSentenceOnDemandLM.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp index 82c198ca1..f45c7917c 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -25,7 +25,7 @@ InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string & InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM() { } - + void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { @@ -60,7 +60,7 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) // m_tmpFilename.reset(new std::string("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm")); m_tmpFilename.reset(new std::string(filename)); - //LanguageModelKen & lm = + //LanguageModelKen & lm = GetPerThreadLM(); // std::remove(filename); @@ -83,7 +83,7 @@ LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPer } VERBOSE(1, filename); - VERBOSE(1, "\tLM initialized\n"); + VERBOSE(1, "\tLM initialized\n"); m_perThreadLM.reset(lm); } diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h index eeaebbe17..7f4b60a5f 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.h +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h @@ -153,7 +153,7 @@ protected: FactorType m_factorType; bool isInitialized() const { - if (m_tmpFilename.get() == NULL) { + if (m_tmpFilename.get() == NULL) { return false; } else { return true; From 6fb2c9702963422a4d4f3dec0eeb390fd77eeab3 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 5 Jan 2017 23:29:00 -0600 Subject: [PATCH 079/176] Bug-fix: regular Western sentence enders not recognized. --- scripts/ems/support/split-sentences.perl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 0c3e4a2d5..160c5d548 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -126,6 +126,10 @@ sub preprocess { # in general, so list them here. U+3002 U+FF0E U+FF1F U+FF01 $text =~ s/([。.?!♪])/$1\n/g; + # A normal full-stop or other Western sentence enders followed + # by an ideograph is an and-of-sentence, always. + $text =~ s/([\.?!]) *(\p{InCJK})/$1\n$2/g; + # Chinese does not use any sort of white-space between ideographs. # Nominally, each single ideograph corresponds to one word. Add # spaces here, so that later processing stages can tokenize readily. From 4d13377ce8a42eff71fd1ad57915198d1f681fbc Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 7 Jan 2017 20:36:41 +0000 Subject: [PATCH 080/176] compile error --- contrib/other-builds/moses/.project | 10 ++++++++++ moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 0f397df33..adb9ad47e 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1715,6 +1715,16 @@ 1 PARENT-3-PROJECT_LOC/moses/LM/Implementation.h + + LM/InMemoryPerSentenceOnDemandLM.cpp + 1 + PARENT-3-PROJECT_LOC/moses/LM/InMemoryPerSentenceOnDemandLM.cpp + + + LM/InMemoryPerSentenceOnDemandLM.h + 1 + PARENT-3-PROJECT_LOC/moses/LM/InMemoryPerSentenceOnDemandLM.h + LM/Jamfile 1 diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp index f45c7917c..0c9a43539 100644 --- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -17,7 +17,7 @@ using namespace std; namespace Moses { -InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) +InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line) { ReadParameters(); } From 2e48f83ab4cbf93b4f39eb8a8f91d1662cc9f5e0 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Sun, 8 Jan 2017 10:08:53 -0600 Subject: [PATCH 081/176] Handle punctuation+CJK combinations. --- scripts/ems/support/split-sentences.perl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 160c5d548..c8ff87dde 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -128,13 +128,19 @@ sub preprocess { # A normal full-stop or other Western sentence enders followed # by an ideograph is an and-of-sentence, always. - $text =~ s/([\.?!]) *(\p{InCJK})/$1\n$2/g; + $text =~ s/([\.?!]) *(\p{CJK})/$1\n$2/g; + + # Split close-paren-then-comma into two. + $text =~ s/(\p{Punctuation}) *(\p{Punctuation})/ $1 $2 /g; # Chinese does not use any sort of white-space between ideographs. # Nominally, each single ideograph corresponds to one word. Add # spaces here, so that later processing stages can tokenize readily. # Note that this handles mixed latinate+CJK. - $text =~ s/(\p{InCJK})/ $1 /g; + # TODO: perhaps also CJKExtA CJKExtB etc ??? CJK_Radicals_Sup ? + $text =~ s/(\p{Punctuation}) *(\p{CJK})/ $1 $2/g + $text =~ s/(\p{CJK}) *(\p{Punctuation})/$1 $2 /g; + $text =~ s/([\p{CJK}\p{CJKSymbols}])/ $1 /g; $text =~ s/ +/ /g; # Special punctuation cases are covered. Check all remaining periods. From 1603c25babaf7e2464ce747c018aaf688c5baebe Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 9 Jan 2017 18:56:25 +0000 Subject: [PATCH 082/176] minor rename --- contrib/moses2/FF/FeatureFunctions.cpp | 8 ++++---- contrib/moses2/FF/FeatureFunctions.h | 2 +- contrib/moses2/PhraseBased/Manager.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/contrib/moses2/FF/FeatureFunctions.cpp index 49a0ace67..5cb0bb1c2 100644 --- a/contrib/moses2/FF/FeatureFunctions.cpp +++ b/contrib/moses2/FF/FeatureFunctions.cpp @@ -52,7 +52,7 @@ void FeatureFunctions::Load() } // load pt -BOOST_FOREACH(const PhraseTable *pt, m_phraseTables) { +BOOST_FOREACH(const PhraseTable *pt, phraseTables) { PhraseTable *nonConstPT = const_cast(pt); cerr << "Loading " << nonConstPT->GetName() << endl; nonConstPT->Load(m_system); @@ -86,8 +86,8 @@ void FeatureFunctions::Create() PhraseTable *pt = dynamic_cast(ff); if (pt) { - pt->SetPtInd(m_phraseTables.size()); - m_phraseTables.push_back(pt); + pt->SetPtInd(phraseTables.size()); + phraseTables.push_back(pt); } UnknownWordPenalty *unkWP = dynamic_cast(pt); @@ -165,7 +165,7 @@ FeatureFunction *FeatureFunctions::FindFeatureFunction( const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd) { // assume only 1 unk wp - std::vector tmpVec(m_phraseTables); + std::vector tmpVec(phraseTables); std::vector::iterator iter; for (iter = tmpVec.begin(); iter != tmpVec.end(); ++iter) { const PhraseTable *pt = *iter; diff --git a/contrib/moses2/FF/FeatureFunctions.h b/contrib/moses2/FF/FeatureFunctions.h index 2232e2a97..271f68c0f 100644 --- a/contrib/moses2/FF/FeatureFunctions.h +++ b/contrib/moses2/FF/FeatureFunctions.h @@ -42,7 +42,7 @@ class Word; class FeatureFunctions { public: - std::vector m_phraseTables; + std::vector phraseTables; FeatureFunctions(System &system); virtual ~FeatureFunctions(); diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/contrib/moses2/PhraseBased/Manager.cpp index b89897070..158b72592 100644 --- a/contrib/moses2/PhraseBased/Manager.cpp +++ b/contrib/moses2/PhraseBased/Manager.cpp @@ -63,7 +63,7 @@ void Manager::Init() m_bitmaps = new Bitmaps(GetPool()); - const PhraseTable &firstPt = *system.featureFunctions.m_phraseTables[0]; + const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0]; m_initPhrase = new (GetPool().Allocate()) TargetPhraseImpl( GetPool(), firstPt, system, 0); From 4910a385906d68caa8f9b54dc6aaeb44b205a9c5 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 10 Jan 2017 16:31:33 +0000 Subject: [PATCH 083/176] enable C++11 --- Jamroot | 2 +- phrase-extract/ScoreFeatureTest.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Jamroot b/Jamroot index 1c4d68abd..d9fc811dd 100644 --- a/Jamroot +++ b/Jamroot @@ -111,7 +111,7 @@ external-lib z ; #lib dl : : static:static shared:shared ; #requirements += dl ; -#requirements += -std=c++0x ; +requirements += -std=c++0x ; # Allow moses to report the git commit hash of the version used for compilation moses_githash = [ _shell "git describe --dirty" ] ; diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp index 60d931438..f82448ab0 100644 --- a/phrase-extract/ScoreFeatureTest.cpp +++ b/phrase-extract/ScoreFeatureTest.cpp @@ -26,8 +26,8 @@ #include #include -//#include -//#include +#include +#include using namespace MosesTraining; using namespace std; @@ -83,7 +83,6 @@ static void checkDomainConfigured( BOOST_CHECK(manager.includeSentenceId()); } -/* template T adder(T v) { @@ -95,7 +94,7 @@ T adder(T first, Args... args) { return first + adder(args...); } -*/ + BOOST_AUTO_TEST_CASE(manager_config_domain) { @@ -112,7 +111,6 @@ BOOST_AUTO_TEST_CASE(manager_config_domain) checkDomainConfigured (boost::assign::list_of("--SparseDomainSubset")("/dev/null")); - /* // C++11 testing unordered_set s; s.insert(4); @@ -137,6 +135,6 @@ BOOST_AUTO_TEST_CASE(manager_config_domain) std::string s1 = "x", s2 = "aa", s3 = "bb", s4 = "yy"; std::string ssum = adder(s1, s2, s3, s4); - */ + } From db22064bbcc97a77ba99c969d65a2d201fc9c002 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Tue, 10 Jan 2017 16:55:38 +0000 Subject: [PATCH 084/176] daily automatic beautifier --- phrase-extract/ScoreFeatureTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp index f82448ab0..28dd0e981 100644 --- a/phrase-extract/ScoreFeatureTest.cpp +++ b/phrase-extract/ScoreFeatureTest.cpp @@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(manager_config_domain) s.insert(4); s.insert(1); - for (auto i: s) { +for (auto i: s) { cerr << i << " "; } @@ -127,7 +127,7 @@ BOOST_AUTO_TEST_CASE(manager_config_domain) m["ba"] = 6; m["aabc"] = 7; - for (auto i: m) { +for (auto i: m) { cerr << i.first << "=" << i.second << " "; } @@ -135,6 +135,6 @@ BOOST_AUTO_TEST_CASE(manager_config_domain) std::string s1 = "x", s2 = "aa", s3 = "bb", s4 = "yy"; std::string ssum = adder(s1, s2, s3, s4); - + } From 48f1bac05daee1d78bcb206b4240171a17807344 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 10 Jan 2017 18:39:16 +0000 Subject: [PATCH 085/176] use ken's read map --- .../TranslationModel/ProbingPT/probing_hash_utils.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp index c72709d3c..11f01a366 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp @@ -1,4 +1,6 @@ +#include #include "probing_hash_utils.hh" +#include "util/file.hh" namespace Moses2 { @@ -6,6 +8,13 @@ namespace Moses2 //Read table from disk, return memory map location char * readTable(const char * filename, size_t size) { + std::cerr << "filename=" << filename << std::endl; + util::scoped_fd file_(util::OpenReadOrThrow(filename)); + uint64_t total_size_ = util::SizeFile(file_.get()); + + util::scoped_memory memory; + MapRead(util::LAZY, file_.get(), 0, total_size_, memory); + //Initial position of the file is the end of the file, thus we know the size int fd; char * map; From cf3b51d54418c632e521949dc8f08cc139a73189 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 10 Jan 2017 18:49:44 +0000 Subject: [PATCH 086/176] use ken's read map --- .../TranslationModel/ProbingPT/probing_hash_utils.cpp | 11 ++++++----- .../TranslationModel/ProbingPT/probing_hash_utils.hh | 2 +- .../moses2/TranslationModel/ProbingPT/querying.cpp | 2 +- contrib/moses2/TranslationModel/ProbingPT/querying.hh | 3 +++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp index 11f01a366..9005671a0 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp @@ -6,14 +6,15 @@ namespace Moses2 { //Read table from disk, return memory map location -char * readTable(const char * filename, size_t size) +char * readTable(const char * filename, size_t size, util::scoped_fd &file, util::scoped_memory &memory) { std::cerr << "filename=" << filename << std::endl; - util::scoped_fd file_(util::OpenReadOrThrow(filename)); - uint64_t total_size_ = util::SizeFile(file_.get()); + file.reset(util::OpenReadOrThrow(filename)); + uint64_t total_size_ = util::SizeFile(file.get()); - util::scoped_memory memory; - MapRead(util::LAZY, file_.get(), 0, total_size_, memory); + MapRead(util::LAZY, file.get(), 0, total_size_, memory); + + //return memory.begin(); //Initial position of the file is the end of the file, thus we know the size int fd; diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh index 841934027..102a596ea 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh @@ -38,7 +38,7 @@ typedef util::ProbingHashTable > Table; void serialize_table(char *mem, size_t size, const std::string &filename); -char * readTable(const char * filename, size_t size); +char * readTable(const char * filename, size_t size, util::scoped_fd &file, util::scoped_memory &memory); uint64_t getKey(const uint64_t source_phrase[], size_t size); diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index e47a6d015..b04513e8b 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -86,7 +86,7 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) //Read hashtable table_filesize = Table::Size(tablesize, 1.2); - mem = readTable(path_to_hashtable.c_str(), table_filesize); + mem = readTable(path_to_hashtable.c_str(), table_filesize, file_, memory_); Table table_init(mem, table_filesize); table = table_init; diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh index 03d7667a9..826e13145 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -26,6 +26,9 @@ class QueryEngine size_t table_filesize; bool is_reordering; + util::scoped_fd file_; + util::scoped_memory memory_; + void read_alignments(const std::string &alignPath); void file_exits(const std::string &basePath); void cat_files(const std::string &basePath); From b8db6070f934693d3b4e75fced34623d0c2cea6e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 10 Jan 2017 18:54:34 +0000 Subject: [PATCH 087/176] use ken's read map --- .../ProbingPT/probing_hash_utils.cpp | 26 +++---------------- .../ProbingPT/probing_hash_utils.hh | 2 +- .../TranslationModel/ProbingPT/querying.cpp | 4 +-- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp index 9005671a0..feefbffb4 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp @@ -6,35 +6,15 @@ namespace Moses2 { //Read table from disk, return memory map location -char * readTable(const char * filename, size_t size, util::scoped_fd &file, util::scoped_memory &memory) +char * readTable(const char * filename, util::LoadMethod load_method, util::scoped_fd &file, util::scoped_memory &memory) { std::cerr << "filename=" << filename << std::endl; file.reset(util::OpenReadOrThrow(filename)); uint64_t total_size_ = util::SizeFile(file.get()); - MapRead(util::LAZY, file.get(), 0, total_size_, memory); + MapRead(load_method, file.get(), 0, total_size_, memory); - //return memory.begin(); - - //Initial position of the file is the end of the file, thus we know the size - int fd; - char * map; - - fd = open(filename, O_RDONLY); - if (fd == -1) { - perror("Error opening file for reading"); - exit(EXIT_FAILURE); - } - - map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); - - if (map == MAP_FAILED) { - close(fd); - perror("Error mmapping the file"); - exit(EXIT_FAILURE); - } - - return map; + return (char*) memory.get(); } void serialize_table(char *mem, size_t size, const std::string &filename) diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh index 102a596ea..368147807 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh @@ -38,7 +38,7 @@ typedef util::ProbingHashTable > Table; void serialize_table(char *mem, size_t size, const std::string &filename); -char * readTable(const char * filename, size_t size, util::scoped_fd &file, util::scoped_memory &memory); +char * readTable(const char * filename, util::LoadMethod load_method, util::scoped_fd &file, util::scoped_memory &memory); uint64_t getKey(const uint64_t source_phrase[], size_t size); diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index b04513e8b..0720ca75e 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -86,7 +86,7 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) //Read hashtable table_filesize = Table::Size(tablesize, 1.2); - mem = readTable(path_to_hashtable.c_str(), table_filesize, file_, memory_); + mem = readTable(path_to_hashtable.c_str(), load_method, file_, memory_); Table table_init(mem, table_filesize); table = table_init; @@ -96,7 +96,7 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) QueryEngine::~QueryEngine() { //Clear mmap content from memory. - munmap(mem, table_filesize); + //munmap(mem, table_filesize); } From 1feb5a389f166387f27ebc061d801808265d4eb3 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 10 Jan 2017 19:27:10 +0000 Subject: [PATCH 088/176] move target phrase mem map into query engine --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 14 ++------------ .../moses2/TranslationModel/ProbingPT/ProbingPT.h | 3 --- .../moses2/TranslationModel/ProbingPT/querying.cpp | 11 +++++++++++ .../moses2/TranslationModel/ProbingPT/querying.hh | 8 ++++++++ 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 26f570641..d0e488e45 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -136,16 +136,6 @@ void ProbingPT::Load(System &system) // alignments CreateAlignmentMap(system, m_path + "/Alignments.dat"); - // memory mapped file to tps - string filePath = m_path + "/TargetColl.dat"; - file.open(filePath.c_str()); - if (!file.is_open()) { - throw "Couldn't open file "; - } - - data = file.data(); - //size_t size = file.size(); - // cache CreateCache(system); } @@ -272,7 +262,7 @@ TargetPhrases *ProbingPT::CreateTargetPhrases(MemPool &pool, //cerr << "key2=" << query_result.second << endl; if (query_result.first) { - const char *offset = data + query_result.second; + const char *offset = m_engine->data + query_result.second; uint64_t *numTP = (uint64_t*) offset; tps = new (pool.Allocate()) TargetPhrases(pool, *numTP); @@ -724,7 +714,7 @@ std::pair ProbingPT::CreateTargetPhrasesSCFG(MemPool // there are some rules const FeatureFunctions &ffs = system.featureFunctions; - const char *offset = data + query_result.second; + const char *offset = m_engine->data + query_result.second; uint64_t *numTP = (uint64_t*) offset; //cerr << "numTP=" << *numTP << endl; diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h index 9b8905843..c5fbefd6f 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h @@ -98,9 +98,6 @@ protected: uint64_t m_unkId; QueryEngine *m_engine; - boost::iostreams::mapped_file_source file; - const char *data; - void CreateAlignmentMap(System &system, const std::string path); TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index 0720ca75e..637049e29 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -29,6 +29,17 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) // alignments read_alignments(alignPath); + // target phrase + string filePath = basepath + "/TargetColl.dat"; + file.open(filePath.c_str()); + if (!file.is_open()) { + throw "Couldn't open file "; + } + + data = file.data(); + //size_t size = file.size(); + + //Read config file boost::unordered_map keyValue; diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh index 826e13145..af19eac20 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -1,5 +1,6 @@ #pragma once +#include #include #include //For finding size of file #include "vocabid.hh" @@ -29,6 +30,12 @@ class QueryEngine util::scoped_fd file_; util::scoped_memory memory_; + // target phrases + boost::iostreams::mapped_file_source file; + + util::scoped_fd fileTPS_; + util::scoped_memory memoryTPS_; + void read_alignments(const std::string &alignPath); void file_exits(const std::string &basePath); void cat_files(const std::string &basePath); @@ -37,6 +44,7 @@ public: int num_scores; int num_lex_scores; bool logProb; + const char *data; QueryEngine(const char *, util::LoadMethod load_method); ~QueryEngine(); From 64a22f0514f9898db737e88f6f656d4901db8e64 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 11 Jan 2017 10:30:05 +0000 Subject: [PATCH 089/176] use ken's method to mem map target phrases --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 4 ++-- .../TranslationModel/ProbingPT/querying.cpp | 21 ++----------------- .../TranslationModel/ProbingPT/querying.hh | 3 +-- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index d0e488e45..c2b2660ca 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -262,7 +262,7 @@ TargetPhrases *ProbingPT::CreateTargetPhrases(MemPool &pool, //cerr << "key2=" << query_result.second << endl; if (query_result.first) { - const char *offset = m_engine->data + query_result.second; + const char *offset = m_engine->memTPS + query_result.second; uint64_t *numTP = (uint64_t*) offset; tps = new (pool.Allocate()) TargetPhrases(pool, *numTP); @@ -714,7 +714,7 @@ std::pair ProbingPT::CreateTargetPhrasesSCFG(MemPool // there are some rules const FeatureFunctions &ffs = system.featureFunctions; - const char *offset = m_engine->data + query_result.second; + const char *offset = m_engine->memTPS + query_result.second; uint64_t *numTP = (uint64_t*) offset; //cerr << "numTP=" << *numTP << endl; diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index 637049e29..9ea2d8cb6 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -19,10 +19,6 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) file_exits(basepath); - if (load_method == util::POPULATE_OR_READ) { - cat_files(basepath); - } - ///Source phrase vocabids read_map(source_vocabids, path_to_source_vocabid.c_str()); @@ -30,15 +26,8 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) read_alignments(alignPath); // target phrase - string filePath = basepath + "/TargetColl.dat"; - file.open(filePath.c_str()); - if (!file.is_open()) { - throw "Couldn't open file "; - } - - data = file.data(); - //size_t size = file.size(); - + string targetCollPath = basepath + "/TargetColl.dat"; + memTPS = readTable(targetCollPath.c_str(), load_method, fileTPS_, memoryTPS_); //Read config file boost::unordered_map keyValue; @@ -187,11 +176,5 @@ void QueryEngine::file_exits(const std::string &basePath) */ } -void QueryEngine::cat_files(const std::string &basePath) -{ - system((string("cat ") + basePath + "/TargetColl.dat > /dev/null").c_str()); - system((string("cat ") + basePath + "/probing_hash.dat > /dev/null").c_str()); -} - } diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh index af19eac20..dcdd2a75a 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -38,13 +38,12 @@ class QueryEngine void read_alignments(const std::string &alignPath); void file_exits(const std::string &basePath); - void cat_files(const std::string &basePath); public: int num_scores; int num_lex_scores; bool logProb; - const char *data; + const char *memTPS; QueryEngine(const char *, util::LoadMethod load_method); ~QueryEngine(); From 88af99ffceb1e6aa9c8246c4eff9816c62bf89d9 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 11 Jan 2017 10:37:15 +0000 Subject: [PATCH 090/176] add FF param args --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 21 +++++++++++++------ .../ProbingPT/probing_hash_utils.cpp | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index c2b2660ca..1f22f45be 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -143,12 +143,21 @@ void ProbingPT::Load(System &system) void ProbingPT::SetParameter(const std::string& key, const std::string& value) { if (key == "load") { - if (value == "lazy") { - load_method = util::LAZY; - } - else if (value == "populate") { - load_method = util::POPULATE_OR_READ; - } + if (value == "lazy") { + load_method = util::LAZY; + } + else if (value == "populate_or_lazy") { + load_method = util::POPULATE_OR_LAZY; + } + else if (value == "populate_or_read" || value == "populate") { + load_method = util::POPULATE_OR_READ; + } + else if (value == "read") { + load_method = util::READ; + } + else if (value == "parallel_read") { + load_method = util::PARALLEL_READ; + } else { UTIL_THROW2("load method not supported" << value); } diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp index feefbffb4..96c317b65 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp @@ -8,7 +8,7 @@ namespace Moses2 //Read table from disk, return memory map location char * readTable(const char * filename, util::LoadMethod load_method, util::scoped_fd &file, util::scoped_memory &memory) { - std::cerr << "filename=" << filename << std::endl; + //std::cerr << "filename=" << filename << std::endl; file.reset(util::OpenReadOrThrow(filename)); uint64_t total_size_ = util::SizeFile(file.get()); From 8fdd19310bfa11278d099d7b655134b468286911 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Wed, 11 Jan 2017 11:23:54 -0600 Subject: [PATCH 091/176] Update to applly CJK processing conditionally. --- scripts/ems/support/split-sentences.perl | 46 ++++++++++++++---------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index c8ff87dde..19d05d8e1 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -18,6 +18,7 @@ my $mydir = "$RealBin/../../share/nonbreaking_prefixes"; my %NONBREAKING_PREFIX = (); my $language = "en"; +my $is_cjk = 0; my $QUIET = 0; my $HELP = 0; @@ -40,6 +41,11 @@ if (!$QUIET) { print STDERR "Language: $language\n"; } +# Is it Chinese, Japanese, Korean? +if ($language eq "yue" || $language eq "zh") { + $is_cjk = 1; +} + my $prefixfile = "$mydir/nonbreaking_prefix.$language"; # Default to English, if we don't have a language-specific prefix file. @@ -120,28 +126,30 @@ sub preprocess { # and are followed by a sentence starter punctuation and upper case. $text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g; - # Chinese uses unusual end-of-sentence markers. These are NOT - # followed by whitespace. Nor is there any idea of capitalization. - # There does not appear to be any unicode category for full-stops - # in general, so list them here. U+3002 U+FF0E U+FF1F U+FF01 - $text =~ s/([。.?!♪])/$1\n/g; + if ($is_cjk == 1) { + # Chinese uses unusual end-of-sentence markers. These are NOT + # followed by whitespace. Nor is there any idea of capitalization. + # There does not appear to be any unicode category for full-stops + # in general, so list them here. U+3002 U+FF0E U+FF1F U+FF01 + $text =~ s/([。.?!♪])/$1\n/g; - # A normal full-stop or other Western sentence enders followed - # by an ideograph is an and-of-sentence, always. - $text =~ s/([\.?!]) *(\p{CJK})/$1\n$2/g; + # A normal full-stop or other Western sentence enders followed + # by an ideograph is an end-of-sentence, always. + $text =~ s/([\.?!]) *(\p{CJK})/$1\n$2/g; - # Split close-paren-then-comma into two. - $text =~ s/(\p{Punctuation}) *(\p{Punctuation})/ $1 $2 /g; + # Split close-paren-then-comma into two. + $text =~ s/(\p{Punctuation}) *(\p{Punctuation})/ $1 $2 /g; - # Chinese does not use any sort of white-space between ideographs. - # Nominally, each single ideograph corresponds to one word. Add - # spaces here, so that later processing stages can tokenize readily. - # Note that this handles mixed latinate+CJK. - # TODO: perhaps also CJKExtA CJKExtB etc ??? CJK_Radicals_Sup ? - $text =~ s/(\p{Punctuation}) *(\p{CJK})/ $1 $2/g - $text =~ s/(\p{CJK}) *(\p{Punctuation})/$1 $2 /g; - $text =~ s/([\p{CJK}\p{CJKSymbols}])/ $1 /g; - $text =~ s/ +/ /g; + # Chinese does not use any sort of white-space between ideographs. + # Nominally, each single ideograph corresponds to one word. Add + # spaces here, so that later processing stages can tokenize readily. + # Note that this handles mixed latinate+CJK. + # TODO: perhaps also CJKExtA CJKExtB etc ??? CJK_Radicals_Sup ? + $text =~ s/(\p{Punctuation}) *(\p{CJK})/ $1 $2/g; + $text =~ s/(\p{CJK}) *(\p{Punctuation})/$1 $2 /g; + $text =~ s/([\p{CJK}\p{CJKSymbols}])/ $1 /g; + $text =~ s/ +/ /g; + } # Special punctuation cases are covered. Check all remaining periods. my $word; From 2e8bb0809bb2982e4e0f2316f7b18cc628dc3d6f Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 14 Jan 2017 17:56:01 +0000 Subject: [PATCH 092/176] tpt_tokenindex.cc: Indentation. --- .../TranslationModel/UG/mm/tpt_tokenindex.cc | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc index 08e123782..4e8095f61 100644 --- a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc +++ b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc @@ -59,8 +59,8 @@ namespace sapt comp.base = reinterpret_cast(endIdx); if (!unkToken.empty()) { - Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp); - unkId = ((bla < endIdx && unkToken == comp.base+bla->offset) + Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp); + unkId = ((bla < endIdx && unkToken == comp.base+bla->offset) ? bla->id : numTokens); } @@ -98,10 +98,10 @@ namespace sapt { if (startIdx != endIdx) { - Entry const* bla = lower_bound(startIdx,endIdx,p,comp); - if (bla != endIdx && !strcmp(comp.base+bla->offset,p)) - return bla->id; - if (!dynamic) return unkId; + Entry const* bla = lower_bound(startIdx,endIdx,p,comp); + if (bla != endIdx && !strcmp(comp.base+bla->offset,p)) + return bla->id; + if (!dynamic) return unkId; } else if (!dynamic) return strcmp(p,"NULL") && unkId; @@ -155,10 +155,10 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - // Someone else (multi-threading!) may have created the - // reverse index in the meantime, so let's check again - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + // Someone else (multi-threading!) may have created the + // reverse index in the meantime, so let's check again + if (!ridx.size()) ridx = reverseIndex(); } if (id < ridx.size()) return ridx[id]; @@ -175,8 +175,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } } @@ -187,8 +187,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } if (id < ridx.size()) return ridx[id]; @@ -204,8 +204,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } ostringstream buf; for (size_t i = 0; i < v.size(); i++) @@ -219,8 +219,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } ostringstream buf; for (size_t i = 0; i < v.size(); i++) @@ -234,8 +234,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } ostringstream buf; if (start < stop) @@ -251,8 +251,8 @@ namespace sapt { if (!ridx.size()) { - boost::lock_guard lk(*this->lock); - if (!ridx.size()) ridx = reverseIndex(); + boost::lock_guard lk(*this->lock); + if (!ridx.size()) ridx = reverseIndex(); } ostringstream buf; if (start < stop) From 27760221c753d28fd08d018258db48719f8e6a56 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 14 Jan 2017 17:56:50 +0000 Subject: [PATCH 093/176] Adjusted library path in moses/TranslationModel/UG/Makefile --- moses/TranslationModel/UG/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/UG/Makefile b/moses/TranslationModel/UG/Makefile index 213f87e0c..0006af638 100644 --- a/moses/TranslationModel/UG/Makefile +++ b/moses/TranslationModel/UG/Makefile @@ -35,7 +35,7 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4 CXXFLAGS += -DKENLM_MAX_ORDER=5 CXXFLAGS += -DWITH_THREADS CXXFLAGS += -DNO_MOSES -CXXFLAGS += -DMMT +# CXXFLAGS += -DMMT CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include @@ -47,7 +47,7 @@ else ifeq ($(variant),syntax) CXXFLAGS += -fsyntax-only endif -LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/ +LDFLAGS = -L${MOSES_ROOT}/opt/lib -L./lib/ # WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL} WDIR = build/$(variant) @@ -73,7 +73,7 @@ DYNAMIC_LIBS = pthread #DYNAMIC_LIBS += tcmalloc LIBS = -Wl,-B$(link) -LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS} +LIBS += -L${MOSES_ROOT}/opt/lib ${BOOSTLIBS} LIBS += $(addprefix -l,${STATIC_LIBS}) LIBS += -Wl,-Bdynamic LIBS += $(addprefix -l,${DYNAMIC_LIBS}) From b237741acdc2097b4878042cf035075b23d4881e Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 14 Jan 2017 17:57:46 +0000 Subject: [PATCH 094/176] Initial check-in: new utility to check overlap of text with training data. --- moses/TranslationModel/UG/check-coverage5.cc | 126 +++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 moses/TranslationModel/UG/check-coverage5.cc diff --git a/moses/TranslationModel/UG/check-coverage5.cc b/moses/TranslationModel/UG/check-coverage5.cc new file mode 100644 index 000000000..549eb7b21 --- /dev/null +++ b/moses/TranslationModel/UG/check-coverage5.cc @@ -0,0 +1,126 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- + +// read a text from stdin, report percentage of n-grams covered + +#include +#include +#include +#include +#include +#include +#include "mm/ug_bitext.h" +#include "generic/file_io/ug_stream.h" +#include +#include +#include "mm/ug_bitext_sampler.h" + +#include +#include + +// #include "LSA.h" + +namespace po=boost::program_options; +using namespace Moses; +using namespace sapt; +using namespace std; +using namespace boost; + +typedef sapt::L2R_Token Token; +typedef mmTtrack ttrack_t; + +size_t ngram_size; +string bname; +vector ifiles; + +void interpret_args(int ac, char* av[]); + + +void +dump(mmTSA::tree_iterator& m, TokenIndex& V) +{ + if (m.size()) cout << m.str(NULL) << endl; + if (m.size()) cout << m.str(&V) << endl; + if (m.down()) + { + do { dump(m, V); } while (m.over()); + m.up(); + } +} + +int +main(int argc, char* argv[]) +{ + interpret_args(argc,argv); + TokenIndex V; + V.open(bname+".tdx"); V.setDynamic(true); V.iniReverseIndex(); + boost::shared_ptr > T(new mmTtrack); + T->open(bname+".mct"); + mmTSA I; I.open(bname+".sfa", T); + + string line; + BOOST_FOREACH(string const& file, ifiles) + { + size_t total_ngrams=0; + float matched_ngrams=0; + ifstream in(file.c_str()); + while(getline(in,line)) + { + // cout << line << endl; + vector snt; + V.fillIdSeq(line,snt); + if (snt.size() < ngram_size) continue; + total_ngrams += snt.size() - ngram_size + 1; + for (size_t i = 0; i + ngram_size <= snt.size(); ++i) + // for (size_t i = 0; i < snt.size(); ++i) + { + mmTSA::tree_iterator m(&I); + size_t stop = min(snt.size(), i+ngram_size); + size_t k = i; + while (k < stop && m.extend(snt[k])) ++k; + // cout << i << " " << k-i << " " << m.str(&V) << endl; + if (k - i == ngram_size) + ++matched_ngrams; + } + } + printf ("%5.1f%% matched %zu-grams (%.0f/%zu): %s\n", + (100 * matched_ngrams / total_ngrams), ngram_size, + matched_ngrams, total_ngrams, file.c_str()); + } +} + +void +interpret_args(int ac, char* av[]) +{ + po::variables_map vm; + po::options_description o("Options"); + o.add_options() + + ("help,h", "print this message") + ("ngram-size,n", po::value(&ngram_size)->default_value(5), + "sample size") + ; + + po::options_description h("Hidden Options"); + h.add_options() + ("bname", po::value(&bname), "base name of corpus") + ("ifiles", po::value >(&ifiles), "input files") + ; + + h.add(o); + po::positional_options_description a; + a.add("bname",1); + a.add("ifiles",-1); + + po::store(po::command_line_parser(ac,av) + .options(h) + .positional(a) + .run(),vm); + po::notify(vm); + if (vm.count("help")) + { + std::cout << "\nusage:\n\t" << av[0] + << " [options] " << std::endl; + std::cout << o << std::endl; + exit(0); + } +} From a6535a6d208db0d2743e0edfde338c910a659ebf Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 14 Jan 2017 17:58:33 +0000 Subject: [PATCH 095/176] Updated to newer boost version in contrib/Makefiles/install-dependencies.gmake --- contrib/Makefiles/install-dependencies.gmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/Makefiles/install-dependencies.gmake b/contrib/Makefiles/install-dependencies.gmake index 8262368a8..e62f0da26 100644 --- a/contrib/Makefiles/install-dependencies.gmake +++ b/contrib/Makefiles/install-dependencies.gmake @@ -91,11 +91,11 @@ $(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh): rm -rf ${TMP} # boost -boost: URL=http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz/download +boost: URL=http://sourceforge.net/projects/boost/files/boost/1.63.0/boost_1_63_0.tar.gz/download boost: TMP=$(CWD)/build/boost boost: override PREFIX=${BOOST_PREFIX} boost: | $(call safepath,${BOOST_PREFIX}/include/boost) $(call safepath,${BOOST_PREFIX}/include/boost): $(sfget) - cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install + cd '${TMP}/boost_1_63_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install rm -rf ${TMP} From 7c62f9c8a480d4c6e2cac1f42a318b64da5dec09 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 20 Jan 2017 16:00:45 +0000 Subject: [PATCH 096/176] compile error on Centos 6 --- .../.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp | 1 - mert/ForestRescore.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp diff --git a/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp b/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp deleted file mode 100644 index 8b1378917..000000000 --- a/contrib/other-builds/.metadata/.plugins/org.eclipse.cdt.make.core/specs.cpp +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp index 009152e35..8638ebc97 100644 --- a/mert/ForestRescore.cpp +++ b/mert/ForestRescore.cpp @@ -346,7 +346,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector< void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector& backgroundBleu, HgHypothesis* bestHypo) { - BackPointer init(NULL,kMinScore); + BackPointer init((const Edge*) NULL,kMinScore); vector backPointers(graph.VertexSize(),init); HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu); vector winnerStats(kBleuNgramOrder*2+1); From 888c3bf9b71b8fd99ca472e561edc121430478c9 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 30 Jan 2017 15:20:08 +0000 Subject: [PATCH 097/176] check all weights are there for all FF --- contrib/moses2/System.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/System.cpp b/contrib/moses2/System.cpp index 4da36690d..c02c47a6c 100644 --- a/contrib/moses2/System.cpp +++ b/contrib/moses2/System.cpp @@ -91,6 +91,19 @@ void System::LoadWeights() //cerr << "Weights:" << endl; typedef std::map > WeightMap; const WeightMap &allWeights = params.GetAllWeights(); + + // check all weights are there for all FF + const std::vector &ffs = featureFunctions.GetFeatureFunctions(); + BOOST_FOREACH(const FeatureFunction *ff, ffs) { + if (ff->IsTuneable()) { + const std::string &ffName = ff->GetName(); + WeightMap::const_iterator iterWeight = allWeights.find(ffName); + UTIL_THROW_IF2(iterWeight == allWeights.end(), "Must specify weight for " << ffName); + } + } + + + // set weight BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) { const string &ffName = valPair.first; const std::vector &ffWeights = valPair.second; @@ -100,7 +113,7 @@ void System::LoadWeights() cerr << ffWeights[i] << " "; } cerr << endl; - */ + */ weights.SetWeights(featureFunctions, ffName, ffWeights); } } From 575def9516b8d3604e292ab7aa90d657da642354 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 00:32:11 +0000 Subject: [PATCH 098/176] get ready to make FeatureRegistry a singleton object --- contrib/moses2/FF/FeatureRegistry.cpp | 8 ++++---- contrib/moses2/FF/FeatureRegistry.h | 4 ++-- contrib/other-builds/moses/.cproject | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/contrib/moses2/FF/FeatureRegistry.cpp index af0af9d0f..fc7540e55 100644 --- a/contrib/moses2/FF/FeatureRegistry.cpp +++ b/contrib/moses2/FF/FeatureRegistry.cpp @@ -28,7 +28,7 @@ template class DefaultFeatureFactory: public FeatureFactory { public: - FeatureFunction *Create(size_t startInd, const std::string &line) + FeatureFunction *Create(size_t startInd, const std::string &line) const { return new F(startInd, line); } @@ -38,7 +38,7 @@ public: class KenFactory: public FeatureFactory { public: - FeatureFunction *Create(size_t startInd, const std::string &line) + FeatureFunction *Create(size_t startInd, const std::string &line) const { ConstructKenLM(startInd, line); } @@ -91,9 +91,9 @@ void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory) } FeatureFunction *FeatureRegistry::Construct(size_t startInd, - const std::string &name, const std::string &line) + const std::string &name, const std::string &line) const { - Map::iterator i = registry_.find(name); + Map::const_iterator i = registry_.find(name); if (i == registry_.end()) { cerr << "Feature name " << name << " is not registered."; abort(); diff --git a/contrib/moses2/FF/FeatureRegistry.h b/contrib/moses2/FF/FeatureRegistry.h index 065820ea7..b326381ba 100644 --- a/contrib/moses2/FF/FeatureRegistry.h +++ b/contrib/moses2/FF/FeatureRegistry.h @@ -14,7 +14,7 @@ public: { } - virtual FeatureFunction *Create(size_t startInd, const std::string &line) = 0; + virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0; protected: FeatureFactory() @@ -31,7 +31,7 @@ public: ~FeatureRegistry(); FeatureFunction *Construct(size_t startInd, const std::string &name, - const std::string &line); + const std::string &line) const; void PrintFF() const; private: diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 491caa587..0e654af8b 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,7 +11,7 @@ - + @@ -86,7 +86,7 @@ - + From e6ebef4b83b5cf238f56b313d9b470c6d9c909cb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 00:45:45 +0000 Subject: [PATCH 099/176] FeatureRegistry a singleton object --- contrib/moses2/FF/FeatureFunctions.cpp | 4 ++-- contrib/moses2/FF/FeatureFunctions.h | 4 +--- contrib/moses2/FF/FeatureRegistry.cpp | 3 +++ contrib/moses2/FF/FeatureRegistry.h | 12 +++++++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/contrib/moses2/FF/FeatureFunctions.cpp index 5cb0bb1c2..4e4f5b54c 100644 --- a/contrib/moses2/FF/FeatureFunctions.cpp +++ b/contrib/moses2/FF/FeatureFunctions.cpp @@ -6,6 +6,7 @@ */ #include +#include "FeatureRegistry.h" #include "FeatureFunctions.h" #include "StatefulFeatureFunction.h" #include "../System.h" @@ -26,7 +27,6 @@ namespace Moses2 FeatureFunctions::FeatureFunctions(System &system) : m_system(system), m_ffStartInd(0) { - //m_registry.PrintFF(); } FeatureFunctions::~FeatureFunctions() @@ -112,7 +112,7 @@ FeatureFunction *FeatureFunctions::Create(const std::string &line) { vector toks = Tokenize(line); - FeatureFunction *ff = m_registry.Construct(m_ffStartInd, toks[0], line); + FeatureFunction *ff = FeatureRegistry::Instance().Construct(m_ffStartInd, toks[0], line); UTIL_THROW_IF2(ff == NULL, "Feature function not created"); // name diff --git a/contrib/moses2/FF/FeatureFunctions.h b/contrib/moses2/FF/FeatureFunctions.h index 271f68c0f..110ebf736 100644 --- a/contrib/moses2/FF/FeatureFunctions.h +++ b/contrib/moses2/FF/FeatureFunctions.h @@ -7,10 +7,10 @@ #pragma once +#include #include #include #include "../legacy/Parameter.h" -#include "FeatureRegistry.h" #include "../Phrase.h" namespace Moses2 @@ -95,8 +95,6 @@ protected: System &m_system; size_t m_ffStartInd; - FeatureRegistry m_registry; - FeatureFunction *Create(const std::string &line); std::string GetDefaultName(const std::string &stub); void OverrideFeatures(); diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/contrib/moses2/FF/FeatureRegistry.cpp index fc7540e55..3ec8706e5 100644 --- a/contrib/moses2/FF/FeatureRegistry.cpp +++ b/contrib/moses2/FF/FeatureRegistry.cpp @@ -22,8 +22,11 @@ using namespace std; + namespace Moses2 { +FeatureRegistry FeatureRegistry::s_instance; + template class DefaultFeatureFactory: public FeatureFactory { diff --git a/contrib/moses2/FF/FeatureRegistry.h b/contrib/moses2/FF/FeatureRegistry.h index b326381ba..63e78aae0 100644 --- a/contrib/moses2/FF/FeatureRegistry.h +++ b/contrib/moses2/FF/FeatureRegistry.h @@ -26,7 +26,9 @@ protected: class FeatureRegistry { public: - FeatureRegistry(); + static const FeatureRegistry &Instance() { + return s_instance; + } ~FeatureRegistry(); @@ -35,11 +37,15 @@ public: void PrintFF() const; private: - void Add(const std::string &name, FeatureFactory *factory); + static FeatureRegistry s_instance; typedef boost::unordered_map > Map; - Map registry_; + + FeatureRegistry(); + + void Add(const std::string &name, FeatureFactory *factory); + }; //////////////////////////////////////////////////////////////////// From 7206d592751ee9afeb1fa4753b7e19272e2585bc Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 00:52:44 +0000 Subject: [PATCH 100/176] print available FFs --- contrib/moses2/legacy/Parameter.cpp | 11 ++++------- contrib/moses2/legacy/Parameter.h | 1 - 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 870a49f2a..5cb88645e 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -26,12 +26,14 @@ #include #include #include +#include + #include "Parameter.h" #include "InputFileStream.h" +#include "../FF/FeatureRegistry.h" #include "util/string_stream.hh" #include "util/exception.hh" #include "util/random.hh" -#include using namespace std; using namespace boost::algorithm; @@ -582,7 +584,7 @@ bool Parameter::LoadParam(int argc, char* xargv[]) FindParam("-config", argc, argv)) == "") { PrintCredit(); Explain(); - PrintFF(); + FeatureRegistry::Instance().PrintFF(); cerr << endl; cerr << "No configuration file was specified. Use -config or -f"; @@ -1638,11 +1640,6 @@ void Parameter::OverwriteParam(const string ¶mName, PARAM_VEC values) cerr << std::endl; } -void Parameter::PrintFF() const -{ - //StaticData::Instance().GetFeatureRegistry().PrintFF(); -} - std::set Parameter::GetWeightNames() const { std::set ret; diff --git a/contrib/moses2/legacy/Parameter.h b/contrib/moses2/legacy/Parameter.h index 5f5ff393c..f43ce98a4 100644 --- a/contrib/moses2/legacy/Parameter.h +++ b/contrib/moses2/legacy/Parameter.h @@ -84,7 +84,6 @@ protected: std::string const& abbrevName, std::string const& description); void PrintCredit(); - void PrintFF() const; void SetWeight(const std::string &name, size_t ind, float weight); void SetWeight(const std::string &name, size_t ind, From a8a5b43f2dc32bd1b45006fd43989dc71e74ba0e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 22:21:59 +0000 Subject: [PATCH 101/176] move moses2 to root --- Jamroot | 2 +- {contrib/moses2-cmd => moses2-cmd}/.cproject | 0 {contrib/moses2-cmd => moses2-cmd}/.project | 0 {contrib/moses2 => moses2}/.cproject | 0 {contrib/moses2 => moses2}/.project | 0 {contrib/moses2 => moses2}/AlignmentInfo.cpp | 0 {contrib/moses2 => moses2}/AlignmentInfo.h | 0 {contrib/moses2 => moses2}/AlignmentInfoCollection.cpp | 0 {contrib/moses2 => moses2}/AlignmentInfoCollection.h | 0 {contrib/moses2 => moses2}/ArcLists.cpp | 0 {contrib/moses2 => moses2}/ArcLists.h | 0 {contrib/moses2 => moses2}/Array.h | 0 {contrib/moses2 => moses2}/EstimatedScores.cpp | 0 {contrib/moses2 => moses2}/EstimatedScores.h | 0 {contrib/moses2 => moses2}/FF/Distortion.cpp | 0 {contrib/moses2 => moses2}/FF/Distortion.h | 0 {contrib/moses2 => moses2}/FF/FFState.cpp | 0 {contrib/moses2 => moses2}/FF/FFState.h | 0 {contrib/moses2 => moses2}/FF/FeatureFunction.cpp | 0 {contrib/moses2 => moses2}/FF/FeatureFunction.h | 0 {contrib/moses2 => moses2}/FF/FeatureFunctions.cpp | 0 {contrib/moses2 => moses2}/FF/FeatureFunctions.h | 0 {contrib/moses2 => moses2}/FF/FeatureRegistry.cpp | 0 {contrib/moses2 => moses2}/FF/FeatureRegistry.h | 0 .../FF/LexicalReordering/BidirectionalReorderingState.cpp | 0 .../FF/LexicalReordering/BidirectionalReorderingState.h | 0 .../FF/LexicalReordering/HReorderingBackwardState.cpp | 0 .../FF/LexicalReordering/HReorderingBackwardState.h | 0 .../FF/LexicalReordering/HReorderingForwardState.cpp | 0 .../FF/LexicalReordering/HReorderingForwardState.h | 0 {contrib/moses2 => moses2}/FF/LexicalReordering/LRModel.cpp | 0 {contrib/moses2 => moses2}/FF/LexicalReordering/LRModel.h | 0 {contrib/moses2 => moses2}/FF/LexicalReordering/LRState.cpp | 0 {contrib/moses2 => moses2}/FF/LexicalReordering/LRState.h | 0 .../FF/LexicalReordering/LexicalReordering.cpp | 0 .../moses2 => moses2}/FF/LexicalReordering/LexicalReordering.h | 0 .../FF/LexicalReordering/PhraseBasedReorderingState.cpp | 0 .../FF/LexicalReordering/PhraseBasedReorderingState.h | 0 .../moses2 => moses2}/FF/LexicalReordering/ReorderingStack.cpp | 0 .../moses2 => moses2}/FF/LexicalReordering/ReorderingStack.h | 0 {contrib/moses2 => moses2}/FF/OSM/KenOSM.cpp | 0 {contrib/moses2 => moses2}/FF/OSM/KenOSM.h | 0 {contrib/moses2 => moses2}/FF/OSM/OpSequenceModel.cpp | 0 {contrib/moses2 => moses2}/FF/OSM/OpSequenceModel.h | 0 {contrib/moses2 => moses2}/FF/OSM/osmHyp.cpp | 0 {contrib/moses2 => moses2}/FF/OSM/osmHyp.h | 0 {contrib/moses2 => moses2}/FF/PhrasePenalty.cpp | 0 {contrib/moses2 => moses2}/FF/PhrasePenalty.h | 0 {contrib/moses2 => moses2}/FF/PointerState.cpp | 0 {contrib/moses2 => moses2}/FF/PointerState.h | 0 {contrib/moses2 => moses2}/FF/SkeletonStatefulFF.cpp | 0 {contrib/moses2 => moses2}/FF/SkeletonStatefulFF.h | 0 {contrib/moses2 => moses2}/FF/SkeletonStatelessFF.cpp | 0 {contrib/moses2 => moses2}/FF/SkeletonStatelessFF.h | 0 {contrib/moses2 => moses2}/FF/StatefulFeatureFunction.cpp | 0 {contrib/moses2 => moses2}/FF/StatefulFeatureFunction.h | 0 {contrib/moses2 => moses2}/FF/StatelessFeatureFunction.cpp | 0 {contrib/moses2 => moses2}/FF/StatelessFeatureFunction.h | 0 {contrib/moses2 => moses2}/FF/WordPenalty.cpp | 0 {contrib/moses2 => moses2}/FF/WordPenalty.h | 0 {contrib/moses2 => moses2}/HypothesisBase.cpp | 0 {contrib/moses2 => moses2}/HypothesisBase.h | 0 {contrib/moses2 => moses2}/HypothesisColl.cpp | 0 {contrib/moses2 => moses2}/HypothesisColl.h | 0 {contrib/moses2 => moses2}/InputPathBase.cpp | 0 {contrib/moses2 => moses2}/InputPathBase.h | 0 {contrib/moses2 => moses2}/InputPathsBase.cpp | 0 {contrib/moses2 => moses2}/InputPathsBase.h | 0 {contrib/moses2 => moses2}/InputType.cpp | 0 {contrib/moses2 => moses2}/InputType.h | 0 {contrib/moses2 => moses2}/Jamfile | 2 +- {contrib/moses2 => moses2}/LM/GPULM.cpp | 0 {contrib/moses2 => moses2}/LM/GPULM.h | 0 {contrib/moses2 => moses2}/LM/KENLM.cpp | 0 {contrib/moses2 => moses2}/LM/KENLM.h | 0 {contrib/moses2 => moses2}/LM/KENLMBatch.cpp | 0 {contrib/moses2 => moses2}/LM/KENLMBatch.h | 0 {contrib/moses2 => moses2}/LM/LanguageModel.cpp | 0 {contrib/moses2 => moses2}/LM/LanguageModel.h | 0 {contrib/moses2 => moses2}/LM/LanguageModelDALM.cpp | 0 {contrib/moses2 => moses2}/LM/LanguageModelDALM.h | 0 {contrib/moses2 => moses2}/Main.cpp | 0 {contrib/moses2 => moses2}/Main.h | 0 {contrib/moses2 => moses2}/ManagerBase.cpp | 0 {contrib/moses2 => moses2}/ManagerBase.h | 0 {contrib/moses2 => moses2}/MemPool.cpp | 0 {contrib/moses2 => moses2}/MemPool.h | 0 {contrib/moses2 => moses2}/MemPoolAllocator.h | 0 {contrib/moses2 => moses2}/MorphoTrie/MorphTrie.h | 0 {contrib/moses2 => moses2}/MorphoTrie/Node.h | 0 {contrib/moses2 => moses2}/MorphoTrie/utils.h | 0 {contrib/moses2 => moses2}/Phrase.cpp | 0 {contrib/moses2 => moses2}/Phrase.h | 0 .../moses2 => moses2}/PhraseBased/CubePruningMiniStack/Misc.cpp | 0 .../moses2 => moses2}/PhraseBased/CubePruningMiniStack/Misc.h | 0 .../PhraseBased/CubePruningMiniStack/Search.cpp | 0 .../moses2 => moses2}/PhraseBased/CubePruningMiniStack/Search.h | 0 .../PhraseBased/CubePruningMiniStack/Stack.cpp | 0 .../moses2 => moses2}/PhraseBased/CubePruningMiniStack/Stack.h | 0 {contrib/moses2 => moses2}/PhraseBased/Hypothesis.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Hypothesis.h | 0 {contrib/moses2 => moses2}/PhraseBased/InputPath.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/InputPath.h | 0 {contrib/moses2 => moses2}/PhraseBased/InputPaths.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/InputPaths.h | 0 {contrib/moses2 => moses2}/PhraseBased/Manager.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Manager.h | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Search.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Search.h | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Stack.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Stack.h | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Stacks.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Normal/Stacks.h | 0 {contrib/moses2 => moses2}/PhraseBased/PhraseImpl.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/PhraseImpl.h | 0 {contrib/moses2 => moses2}/PhraseBased/ReorderingConstraint.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/ReorderingConstraint.h | 0 {contrib/moses2 => moses2}/PhraseBased/Search.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Search.h | 0 {contrib/moses2 => moses2}/PhraseBased/Sentence.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/Sentence.h | 0 {contrib/moses2 => moses2}/PhraseBased/TargetPhraseImpl.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/TargetPhraseImpl.h | 0 {contrib/moses2 => moses2}/PhraseBased/TargetPhrases.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/TargetPhrases.h | 0 {contrib/moses2 => moses2}/PhraseBased/TrellisPath.cpp | 0 {contrib/moses2 => moses2}/PhraseBased/TrellisPath.h | 0 {contrib/moses2 => moses2}/PhraseImplTemplate.h | 0 {contrib/moses2 => moses2}/Recycler.cpp | 0 {contrib/moses2 => moses2}/Recycler.h | 0 {contrib/moses2 => moses2}/SCFG/ActiveChart.cpp | 0 {contrib/moses2 => moses2}/SCFG/ActiveChart.h | 0 {contrib/moses2 => moses2}/SCFG/Hypothesis.cpp | 0 {contrib/moses2 => moses2}/SCFG/Hypothesis.h | 0 {contrib/moses2 => moses2}/SCFG/InputPath.cpp | 0 {contrib/moses2 => moses2}/SCFG/InputPath.h | 0 {contrib/moses2 => moses2}/SCFG/InputPaths.cpp | 0 {contrib/moses2 => moses2}/SCFG/InputPaths.h | 0 {contrib/moses2 => moses2}/SCFG/Manager.cpp | 0 {contrib/moses2 => moses2}/SCFG/Manager.h | 0 {contrib/moses2 => moses2}/SCFG/Misc.cpp | 0 {contrib/moses2 => moses2}/SCFG/Misc.h | 0 {contrib/moses2 => moses2}/SCFG/PhraseImpl.cpp | 0 {contrib/moses2 => moses2}/SCFG/PhraseImpl.h | 0 {contrib/moses2 => moses2}/SCFG/Sentence.cpp | 0 {contrib/moses2 => moses2}/SCFG/Sentence.h | 0 {contrib/moses2 => moses2}/SCFG/Stack.cpp | 0 {contrib/moses2 => moses2}/SCFG/Stack.h | 0 {contrib/moses2 => moses2}/SCFG/Stacks.cpp | 0 {contrib/moses2 => moses2}/SCFG/Stacks.h | 0 {contrib/moses2 => moses2}/SCFG/TargetPhraseImpl.cpp | 0 {contrib/moses2 => moses2}/SCFG/TargetPhraseImpl.h | 0 {contrib/moses2 => moses2}/SCFG/TargetPhrases.cpp | 0 {contrib/moses2 => moses2}/SCFG/TargetPhrases.h | 0 {contrib/moses2 => moses2}/SCFG/Word.cpp | 0 {contrib/moses2 => moses2}/SCFG/Word.h | 0 {contrib/moses2 => moses2}/SCFG/nbest/KBestExtractor.cpp | 0 {contrib/moses2 => moses2}/SCFG/nbest/KBestExtractor.h | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBest.cpp | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBest.h | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBestColl.cpp | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBestColl.h | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBests.cpp | 0 {contrib/moses2 => moses2}/SCFG/nbest/NBests.h | 0 {contrib/moses2 => moses2}/Scores.cpp | 0 {contrib/moses2 => moses2}/Scores.h | 0 {contrib/moses2 => moses2}/SubPhrase.cpp | 0 {contrib/moses2 => moses2}/SubPhrase.h | 0 {contrib/moses2 => moses2}/System.cpp | 0 {contrib/moses2 => moses2}/System.h | 0 {contrib/moses2 => moses2}/TargetPhrase.cpp | 0 {contrib/moses2 => moses2}/TargetPhrase.h | 0 .../TranslationModel/CompactPT/BlockHashIndex.cpp | 0 .../TranslationModel/CompactPT/BlockHashIndex.h | 0 .../TranslationModel/CompactPT/CanonicalHuffman.h | 0 .../TranslationModel/CompactPT/CmphStringVectorAdapter.cpp | 0 .../TranslationModel/CompactPT/CmphStringVectorAdapter.h | 0 .../CompactPT/LexicalReorderingTableCompact.cpp | 0 .../TranslationModel/CompactPT/LexicalReorderingTableCompact.h | 0 .../moses2 => moses2}/TranslationModel/CompactPT/ListCoders.h | 0 .../TranslationModel/CompactPT/MmapAllocator.h | 0 .../TranslationModel/CompactPT/MonotonicVector.h | 0 .../TranslationModel/CompactPT/MurmurHash3.cpp | 0 .../moses2 => moses2}/TranslationModel/CompactPT/MurmurHash3.h | 0 .../moses2 => moses2}/TranslationModel/CompactPT/PackedArray.h | 0 .../TranslationModel/CompactPT/PhraseDecoder.cpp | 0 .../TranslationModel/CompactPT/PhraseDecoder.h | 0 .../TranslationModel/CompactPT/PhraseTableCompact.cpp | 0 .../TranslationModel/CompactPT/PhraseTableCompact.h | 0 .../moses2 => moses2}/TranslationModel/CompactPT/StringVector.h | 0 .../TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp | 0 .../TranslationModel/CompactPT/TargetPhraseCollectionCache.h | 0 .../TranslationModel/CompactPT/ThrowingFwrite.cpp | 0 .../TranslationModel/CompactPT/ThrowingFwrite.h | 0 {contrib/moses2 => moses2}/TranslationModel/Memory/Node.h | 0 .../TranslationModel/Memory/PhraseTableMemory.cpp | 0 .../TranslationModel/Memory/PhraseTableMemory.h | 0 {contrib/moses2 => moses2}/TranslationModel/PhraseTable.cpp | 0 {contrib/moses2 => moses2}/TranslationModel/PhraseTable.h | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/ProbingPT.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/ProbingPT.h | 0 .../TranslationModel/ProbingPT/StoreTarget.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/StoreTarget.h | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/StoreVocab.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/StoreVocab.h | 0 {contrib/moses2 => moses2}/TranslationModel/ProbingPT/hash.cpp | 0 {contrib/moses2 => moses2}/TranslationModel/ProbingPT/hash.hh | 0 .../TranslationModel/ProbingPT/line_splitter.cpp | 0 .../TranslationModel/ProbingPT/line_splitter.hh | 0 .../TranslationModel/ProbingPT/probing_hash_utils.cpp | 0 .../TranslationModel/ProbingPT/probing_hash_utils.hh | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/querying.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/querying.hh | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/storing.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/storing.hh | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/vocabid.cpp | 0 .../moses2 => moses2}/TranslationModel/ProbingPT/vocabid.hh | 0 {contrib/moses2 => moses2}/TranslationModel/Transliteration.cpp | 0 {contrib/moses2 => moses2}/TranslationModel/Transliteration.h | 0 .../moses2 => moses2}/TranslationModel/UnknownWordPenalty.cpp | 0 .../moses2 => moses2}/TranslationModel/UnknownWordPenalty.h | 0 {contrib/moses2 => moses2}/TranslationTask.cpp | 0 {contrib/moses2 => moses2}/TranslationTask.h | 0 {contrib/moses2 => moses2}/TrellisPaths.cpp | 0 {contrib/moses2 => moses2}/TrellisPaths.h | 0 {contrib/moses2 => moses2}/TypeDef.cpp | 0 {contrib/moses2 => moses2}/TypeDef.h | 0 {contrib/moses2 => moses2}/Vector.cpp | 0 {contrib/moses2 => moses2}/Vector.h | 0 {contrib/moses2 => moses2}/Weights.cpp | 0 {contrib/moses2 => moses2}/Weights.h | 0 {contrib/moses2 => moses2}/Word.cpp | 0 {contrib/moses2 => moses2}/Word.h | 0 .../moses2 => moses2}/defer/CubePruningBitmapStack/Misc.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Misc.h | 0 .../moses2 => moses2}/defer/CubePruningBitmapStack/Search.cpp | 0 .../moses2 => moses2}/defer/CubePruningBitmapStack/Search.h | 0 .../moses2 => moses2}/defer/CubePruningBitmapStack/Stack.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Stack.h | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Misc.cpp | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Misc.h | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Search.cpp | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Search.h | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Stack.cpp | 0 .../moses2 => moses2}/defer/CubePruningCardinalStack/Stack.h | 0 {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Misc.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Misc.h | 0 .../moses2 => moses2}/defer/CubePruningPerBitmap/Search.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Search.h | 0 .../moses2 => moses2}/defer/CubePruningPerBitmap/Stacks.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Stacks.h | 0 .../moses2 => moses2}/defer/CubePruningPerMiniStack/Misc.cpp | 0 {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Misc.h | 0 .../moses2 => moses2}/defer/CubePruningPerMiniStack/Search.cpp | 0 .../moses2 => moses2}/defer/CubePruningPerMiniStack/Search.h | 0 .../moses2 => moses2}/defer/CubePruningPerMiniStack/Stacks.cpp | 0 .../moses2 => moses2}/defer/CubePruningPerMiniStack/Stacks.h | 0 {contrib/moses2 => moses2}/legacy/Bitmap.cpp | 0 {contrib/moses2 => moses2}/legacy/Bitmap.h | 0 {contrib/moses2 => moses2}/legacy/Bitmaps.cpp | 0 {contrib/moses2 => moses2}/legacy/Bitmaps.h | 0 {contrib/moses2 => moses2}/legacy/Factor.cpp | 0 {contrib/moses2 => moses2}/legacy/Factor.h | 0 {contrib/moses2 => moses2}/legacy/FactorCollection.cpp | 0 {contrib/moses2 => moses2}/legacy/FactorCollection.h | 0 {contrib/moses2 => moses2}/legacy/InputFileStream.cpp | 0 {contrib/moses2 => moses2}/legacy/InputFileStream.h | 0 {contrib/moses2 => moses2}/legacy/Matrix.cpp | 0 {contrib/moses2 => moses2}/legacy/Matrix.h | 0 {contrib/moses2 => moses2}/legacy/OutputCollector.h | 0 {contrib/moses2 => moses2}/legacy/OutputFileStream.cpp | 0 {contrib/moses2 => moses2}/legacy/OutputFileStream.h | 0 {contrib/moses2 => moses2}/legacy/Parameter.cpp | 0 {contrib/moses2 => moses2}/legacy/Parameter.h | 0 {contrib/moses2 => moses2}/legacy/Range.cpp | 0 {contrib/moses2 => moses2}/legacy/Range.h | 0 {contrib/moses2 => moses2}/legacy/ThreadPool.cpp | 0 {contrib/moses2 => moses2}/legacy/ThreadPool.h | 0 {contrib/moses2 => moses2}/legacy/Timer.cpp | 0 {contrib/moses2 => moses2}/legacy/Timer.h | 0 {contrib/moses2 => moses2}/legacy/Util2.cpp | 0 {contrib/moses2 => moses2}/legacy/Util2.h | 0 {contrib/moses2 => moses2}/legacy/gzfilebuf.h | 0 {contrib/moses2 => moses2}/parameters/AllOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/AllOptions.h | 0 {contrib/moses2 => moses2}/parameters/BeamSearchOptions.h | 0 {contrib/moses2 => moses2}/parameters/BookkeepingOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/BookkeepingOptions.h | 0 {contrib/moses2 => moses2}/parameters/ContextParameters.cpp | 0 {contrib/moses2 => moses2}/parameters/ContextParameters.h | 0 {contrib/moses2 => moses2}/parameters/CubePruningOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/CubePruningOptions.h | 0 {contrib/moses2 => moses2}/parameters/InputOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/InputOptions.h | 0 {contrib/moses2 => moses2}/parameters/LMBR_Options.cpp | 0 {contrib/moses2 => moses2}/parameters/LMBR_Options.h | 0 {contrib/moses2 => moses2}/parameters/LookupOptions.h | 0 {contrib/moses2 => moses2}/parameters/MBR_Options.cpp | 0 {contrib/moses2 => moses2}/parameters/MBR_Options.h | 0 {contrib/moses2 => moses2}/parameters/NBestOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/NBestOptions.h | 0 {contrib/moses2 => moses2}/parameters/OOVHandlingOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/OOVHandlingOptions.h | 0 {contrib/moses2 => moses2}/parameters/OptionsBaseClass.cpp | 0 {contrib/moses2 => moses2}/parameters/OptionsBaseClass.h | 0 {contrib/moses2 => moses2}/parameters/ReorderingOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/ReorderingOptions.h | 0 {contrib/moses2 => moses2}/parameters/ReportingOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/ReportingOptions.h | 0 {contrib/moses2 => moses2}/parameters/SearchOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/SearchOptions.h | 0 {contrib/moses2 => moses2}/parameters/ServerOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/ServerOptions.h | 0 {contrib/moses2 => moses2}/parameters/SyntaxOptions.cpp | 0 {contrib/moses2 => moses2}/parameters/SyntaxOptions.h | 0 {contrib/moses2 => moses2}/pugiconfig.hpp | 0 {contrib/moses2 => moses2}/pugixml.cpp | 0 {contrib/moses2 => moses2}/pugixml.hpp | 0 {contrib/moses2 => moses2}/server/Server.cpp | 0 {contrib/moses2 => moses2}/server/Server.h | 0 {contrib/moses2 => moses2}/server/TranslationRequest.cpp | 0 {contrib/moses2 => moses2}/server/TranslationRequest.h | 0 {contrib/moses2 => moses2}/server/Translator.cpp | 0 {contrib/moses2 => moses2}/server/Translator.h | 0 324 files changed, 2 insertions(+), 2 deletions(-) rename {contrib/moses2-cmd => moses2-cmd}/.cproject (100%) rename {contrib/moses2-cmd => moses2-cmd}/.project (100%) rename {contrib/moses2 => moses2}/.cproject (100%) rename {contrib/moses2 => moses2}/.project (100%) rename {contrib/moses2 => moses2}/AlignmentInfo.cpp (100%) rename {contrib/moses2 => moses2}/AlignmentInfo.h (100%) rename {contrib/moses2 => moses2}/AlignmentInfoCollection.cpp (100%) rename {contrib/moses2 => moses2}/AlignmentInfoCollection.h (100%) rename {contrib/moses2 => moses2}/ArcLists.cpp (100%) rename {contrib/moses2 => moses2}/ArcLists.h (100%) rename {contrib/moses2 => moses2}/Array.h (100%) rename {contrib/moses2 => moses2}/EstimatedScores.cpp (100%) rename {contrib/moses2 => moses2}/EstimatedScores.h (100%) rename {contrib/moses2 => moses2}/FF/Distortion.cpp (100%) rename {contrib/moses2 => moses2}/FF/Distortion.h (100%) rename {contrib/moses2 => moses2}/FF/FFState.cpp (100%) rename {contrib/moses2 => moses2}/FF/FFState.h (100%) rename {contrib/moses2 => moses2}/FF/FeatureFunction.cpp (100%) rename {contrib/moses2 => moses2}/FF/FeatureFunction.h (100%) rename {contrib/moses2 => moses2}/FF/FeatureFunctions.cpp (100%) rename {contrib/moses2 => moses2}/FF/FeatureFunctions.h (100%) rename {contrib/moses2 => moses2}/FF/FeatureRegistry.cpp (100%) rename {contrib/moses2 => moses2}/FF/FeatureRegistry.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/BidirectionalReorderingState.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/BidirectionalReorderingState.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/HReorderingBackwardState.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/HReorderingBackwardState.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/HReorderingForwardState.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/HReorderingForwardState.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LRModel.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LRModel.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LRState.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LRState.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LexicalReordering.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/LexicalReordering.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/PhraseBasedReorderingState.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/PhraseBasedReorderingState.h (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/ReorderingStack.cpp (100%) rename {contrib/moses2 => moses2}/FF/LexicalReordering/ReorderingStack.h (100%) rename {contrib/moses2 => moses2}/FF/OSM/KenOSM.cpp (100%) rename {contrib/moses2 => moses2}/FF/OSM/KenOSM.h (100%) rename {contrib/moses2 => moses2}/FF/OSM/OpSequenceModel.cpp (100%) rename {contrib/moses2 => moses2}/FF/OSM/OpSequenceModel.h (100%) rename {contrib/moses2 => moses2}/FF/OSM/osmHyp.cpp (100%) rename {contrib/moses2 => moses2}/FF/OSM/osmHyp.h (100%) rename {contrib/moses2 => moses2}/FF/PhrasePenalty.cpp (100%) rename {contrib/moses2 => moses2}/FF/PhrasePenalty.h (100%) rename {contrib/moses2 => moses2}/FF/PointerState.cpp (100%) rename {contrib/moses2 => moses2}/FF/PointerState.h (100%) rename {contrib/moses2 => moses2}/FF/SkeletonStatefulFF.cpp (100%) rename {contrib/moses2 => moses2}/FF/SkeletonStatefulFF.h (100%) rename {contrib/moses2 => moses2}/FF/SkeletonStatelessFF.cpp (100%) rename {contrib/moses2 => moses2}/FF/SkeletonStatelessFF.h (100%) rename {contrib/moses2 => moses2}/FF/StatefulFeatureFunction.cpp (100%) rename {contrib/moses2 => moses2}/FF/StatefulFeatureFunction.h (100%) rename {contrib/moses2 => moses2}/FF/StatelessFeatureFunction.cpp (100%) rename {contrib/moses2 => moses2}/FF/StatelessFeatureFunction.h (100%) rename {contrib/moses2 => moses2}/FF/WordPenalty.cpp (100%) rename {contrib/moses2 => moses2}/FF/WordPenalty.h (100%) rename {contrib/moses2 => moses2}/HypothesisBase.cpp (100%) rename {contrib/moses2 => moses2}/HypothesisBase.h (100%) rename {contrib/moses2 => moses2}/HypothesisColl.cpp (100%) rename {contrib/moses2 => moses2}/HypothesisColl.h (100%) rename {contrib/moses2 => moses2}/InputPathBase.cpp (100%) rename {contrib/moses2 => moses2}/InputPathBase.h (100%) rename {contrib/moses2 => moses2}/InputPathsBase.cpp (100%) rename {contrib/moses2 => moses2}/InputPathsBase.h (100%) rename {contrib/moses2 => moses2}/InputType.cpp (100%) rename {contrib/moses2 => moses2}/InputType.h (100%) rename {contrib/moses2 => moses2}/Jamfile (97%) rename {contrib/moses2 => moses2}/LM/GPULM.cpp (100%) rename {contrib/moses2 => moses2}/LM/GPULM.h (100%) rename {contrib/moses2 => moses2}/LM/KENLM.cpp (100%) rename {contrib/moses2 => moses2}/LM/KENLM.h (100%) rename {contrib/moses2 => moses2}/LM/KENLMBatch.cpp (100%) rename {contrib/moses2 => moses2}/LM/KENLMBatch.h (100%) rename {contrib/moses2 => moses2}/LM/LanguageModel.cpp (100%) rename {contrib/moses2 => moses2}/LM/LanguageModel.h (100%) rename {contrib/moses2 => moses2}/LM/LanguageModelDALM.cpp (100%) rename {contrib/moses2 => moses2}/LM/LanguageModelDALM.h (100%) rename {contrib/moses2 => moses2}/Main.cpp (100%) rename {contrib/moses2 => moses2}/Main.h (100%) rename {contrib/moses2 => moses2}/ManagerBase.cpp (100%) rename {contrib/moses2 => moses2}/ManagerBase.h (100%) rename {contrib/moses2 => moses2}/MemPool.cpp (100%) rename {contrib/moses2 => moses2}/MemPool.h (100%) rename {contrib/moses2 => moses2}/MemPoolAllocator.h (100%) rename {contrib/moses2 => moses2}/MorphoTrie/MorphTrie.h (100%) rename {contrib/moses2 => moses2}/MorphoTrie/Node.h (100%) rename {contrib/moses2 => moses2}/MorphoTrie/utils.h (100%) rename {contrib/moses2 => moses2}/Phrase.cpp (100%) rename {contrib/moses2 => moses2}/Phrase.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Misc.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Misc.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Search.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Search.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Stack.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/CubePruningMiniStack/Stack.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Hypothesis.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Hypothesis.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/InputPath.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/InputPath.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/InputPaths.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/InputPaths.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Manager.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Manager.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Search.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Search.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Stack.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Stack.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Stacks.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Normal/Stacks.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/PhraseImpl.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/PhraseImpl.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/ReorderingConstraint.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/ReorderingConstraint.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Search.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Search.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/Sentence.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/Sentence.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/TargetPhraseImpl.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/TargetPhraseImpl.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/TargetPhrases.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/TargetPhrases.h (100%) rename {contrib/moses2 => moses2}/PhraseBased/TrellisPath.cpp (100%) rename {contrib/moses2 => moses2}/PhraseBased/TrellisPath.h (100%) rename {contrib/moses2 => moses2}/PhraseImplTemplate.h (100%) rename {contrib/moses2 => moses2}/Recycler.cpp (100%) rename {contrib/moses2 => moses2}/Recycler.h (100%) rename {contrib/moses2 => moses2}/SCFG/ActiveChart.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/ActiveChart.h (100%) rename {contrib/moses2 => moses2}/SCFG/Hypothesis.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Hypothesis.h (100%) rename {contrib/moses2 => moses2}/SCFG/InputPath.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/InputPath.h (100%) rename {contrib/moses2 => moses2}/SCFG/InputPaths.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/InputPaths.h (100%) rename {contrib/moses2 => moses2}/SCFG/Manager.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Manager.h (100%) rename {contrib/moses2 => moses2}/SCFG/Misc.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Misc.h (100%) rename {contrib/moses2 => moses2}/SCFG/PhraseImpl.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/PhraseImpl.h (100%) rename {contrib/moses2 => moses2}/SCFG/Sentence.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Sentence.h (100%) rename {contrib/moses2 => moses2}/SCFG/Stack.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Stack.h (100%) rename {contrib/moses2 => moses2}/SCFG/Stacks.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Stacks.h (100%) rename {contrib/moses2 => moses2}/SCFG/TargetPhraseImpl.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/TargetPhraseImpl.h (100%) rename {contrib/moses2 => moses2}/SCFG/TargetPhrases.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/TargetPhrases.h (100%) rename {contrib/moses2 => moses2}/SCFG/Word.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/Word.h (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/KBestExtractor.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/KBestExtractor.h (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBest.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBest.h (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBestColl.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBestColl.h (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBests.cpp (100%) rename {contrib/moses2 => moses2}/SCFG/nbest/NBests.h (100%) rename {contrib/moses2 => moses2}/Scores.cpp (100%) rename {contrib/moses2 => moses2}/Scores.h (100%) rename {contrib/moses2 => moses2}/SubPhrase.cpp (100%) rename {contrib/moses2 => moses2}/SubPhrase.h (100%) rename {contrib/moses2 => moses2}/System.cpp (100%) rename {contrib/moses2 => moses2}/System.h (100%) rename {contrib/moses2 => moses2}/TargetPhrase.cpp (100%) rename {contrib/moses2 => moses2}/TargetPhrase.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/BlockHashIndex.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/BlockHashIndex.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/CanonicalHuffman.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/CmphStringVectorAdapter.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/LexicalReorderingTableCompact.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/ListCoders.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/MmapAllocator.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/MonotonicVector.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/MurmurHash3.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/MurmurHash3.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/PackedArray.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/PhraseDecoder.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/PhraseDecoder.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/PhraseTableCompact.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/PhraseTableCompact.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/StringVector.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/TargetPhraseCollectionCache.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/ThrowingFwrite.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/CompactPT/ThrowingFwrite.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/Memory/Node.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/Memory/PhraseTableMemory.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/Memory/PhraseTableMemory.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/PhraseTable.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/PhraseTable.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/ProbingPT.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/ProbingPT.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/StoreTarget.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/StoreTarget.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/StoreVocab.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/StoreVocab.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/hash.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/hash.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/line_splitter.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/line_splitter.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/probing_hash_utils.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/probing_hash_utils.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/querying.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/querying.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/storing.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/storing.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/vocabid.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/ProbingPT/vocabid.hh (100%) rename {contrib/moses2 => moses2}/TranslationModel/Transliteration.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/Transliteration.h (100%) rename {contrib/moses2 => moses2}/TranslationModel/UnknownWordPenalty.cpp (100%) rename {contrib/moses2 => moses2}/TranslationModel/UnknownWordPenalty.h (100%) rename {contrib/moses2 => moses2}/TranslationTask.cpp (100%) rename {contrib/moses2 => moses2}/TranslationTask.h (100%) rename {contrib/moses2 => moses2}/TrellisPaths.cpp (100%) rename {contrib/moses2 => moses2}/TrellisPaths.h (100%) rename {contrib/moses2 => moses2}/TypeDef.cpp (100%) rename {contrib/moses2 => moses2}/TypeDef.h (100%) rename {contrib/moses2 => moses2}/Vector.cpp (100%) rename {contrib/moses2 => moses2}/Vector.h (100%) rename {contrib/moses2 => moses2}/Weights.cpp (100%) rename {contrib/moses2 => moses2}/Weights.h (100%) rename {contrib/moses2 => moses2}/Word.cpp (100%) rename {contrib/moses2 => moses2}/Word.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Misc.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Misc.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Search.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Search.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Stack.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningBitmapStack/Stack.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Misc.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Misc.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Search.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Search.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Stack.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningCardinalStack/Stack.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Misc.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Misc.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Search.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Search.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Stacks.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerBitmap/Stacks.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Misc.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Misc.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Search.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Search.h (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Stacks.cpp (100%) rename {contrib/moses2 => moses2}/defer/CubePruningPerMiniStack/Stacks.h (100%) rename {contrib/moses2 => moses2}/legacy/Bitmap.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Bitmap.h (100%) rename {contrib/moses2 => moses2}/legacy/Bitmaps.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Bitmaps.h (100%) rename {contrib/moses2 => moses2}/legacy/Factor.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Factor.h (100%) rename {contrib/moses2 => moses2}/legacy/FactorCollection.cpp (100%) rename {contrib/moses2 => moses2}/legacy/FactorCollection.h (100%) rename {contrib/moses2 => moses2}/legacy/InputFileStream.cpp (100%) rename {contrib/moses2 => moses2}/legacy/InputFileStream.h (100%) rename {contrib/moses2 => moses2}/legacy/Matrix.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Matrix.h (100%) rename {contrib/moses2 => moses2}/legacy/OutputCollector.h (100%) rename {contrib/moses2 => moses2}/legacy/OutputFileStream.cpp (100%) rename {contrib/moses2 => moses2}/legacy/OutputFileStream.h (100%) rename {contrib/moses2 => moses2}/legacy/Parameter.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Parameter.h (100%) rename {contrib/moses2 => moses2}/legacy/Range.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Range.h (100%) rename {contrib/moses2 => moses2}/legacy/ThreadPool.cpp (100%) rename {contrib/moses2 => moses2}/legacy/ThreadPool.h (100%) rename {contrib/moses2 => moses2}/legacy/Timer.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Timer.h (100%) rename {contrib/moses2 => moses2}/legacy/Util2.cpp (100%) rename {contrib/moses2 => moses2}/legacy/Util2.h (100%) rename {contrib/moses2 => moses2}/legacy/gzfilebuf.h (100%) rename {contrib/moses2 => moses2}/parameters/AllOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/AllOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/BeamSearchOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/BookkeepingOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/BookkeepingOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/ContextParameters.cpp (100%) rename {contrib/moses2 => moses2}/parameters/ContextParameters.h (100%) rename {contrib/moses2 => moses2}/parameters/CubePruningOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/CubePruningOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/InputOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/InputOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/LMBR_Options.cpp (100%) rename {contrib/moses2 => moses2}/parameters/LMBR_Options.h (100%) rename {contrib/moses2 => moses2}/parameters/LookupOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/MBR_Options.cpp (100%) rename {contrib/moses2 => moses2}/parameters/MBR_Options.h (100%) rename {contrib/moses2 => moses2}/parameters/NBestOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/NBestOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/OOVHandlingOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/OOVHandlingOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/OptionsBaseClass.cpp (100%) rename {contrib/moses2 => moses2}/parameters/OptionsBaseClass.h (100%) rename {contrib/moses2 => moses2}/parameters/ReorderingOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/ReorderingOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/ReportingOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/ReportingOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/SearchOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/SearchOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/ServerOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/ServerOptions.h (100%) rename {contrib/moses2 => moses2}/parameters/SyntaxOptions.cpp (100%) rename {contrib/moses2 => moses2}/parameters/SyntaxOptions.h (100%) rename {contrib/moses2 => moses2}/pugiconfig.hpp (100%) rename {contrib/moses2 => moses2}/pugixml.cpp (100%) rename {contrib/moses2 => moses2}/pugixml.hpp (100%) rename {contrib/moses2 => moses2}/server/Server.cpp (100%) rename {contrib/moses2 => moses2}/server/Server.h (100%) rename {contrib/moses2 => moses2}/server/TranslationRequest.cpp (100%) rename {contrib/moses2 => moses2}/server/TranslationRequest.h (100%) rename {contrib/moses2 => moses2}/server/Translator.cpp (100%) rename {contrib/moses2 => moses2}/server/Translator.h (100%) diff --git a/Jamroot b/Jamroot index d9fc811dd..ea4650670 100644 --- a/Jamroot +++ b/Jamroot @@ -316,7 +316,7 @@ contrib/c++tokenizer//tokenizer contrib/expected-bleu-training//train-expected-bleu contrib/expected-bleu-training//prepare-expected-bleu-training -contrib/moses2//programs +moses2//programs ; diff --git a/contrib/moses2-cmd/.cproject b/moses2-cmd/.cproject similarity index 100% rename from contrib/moses2-cmd/.cproject rename to moses2-cmd/.cproject diff --git a/contrib/moses2-cmd/.project b/moses2-cmd/.project similarity index 100% rename from contrib/moses2-cmd/.project rename to moses2-cmd/.project diff --git a/contrib/moses2/.cproject b/moses2/.cproject similarity index 100% rename from contrib/moses2/.cproject rename to moses2/.cproject diff --git a/contrib/moses2/.project b/moses2/.project similarity index 100% rename from contrib/moses2/.project rename to moses2/.project diff --git a/contrib/moses2/AlignmentInfo.cpp b/moses2/AlignmentInfo.cpp similarity index 100% rename from contrib/moses2/AlignmentInfo.cpp rename to moses2/AlignmentInfo.cpp diff --git a/contrib/moses2/AlignmentInfo.h b/moses2/AlignmentInfo.h similarity index 100% rename from contrib/moses2/AlignmentInfo.h rename to moses2/AlignmentInfo.h diff --git a/contrib/moses2/AlignmentInfoCollection.cpp b/moses2/AlignmentInfoCollection.cpp similarity index 100% rename from contrib/moses2/AlignmentInfoCollection.cpp rename to moses2/AlignmentInfoCollection.cpp diff --git a/contrib/moses2/AlignmentInfoCollection.h b/moses2/AlignmentInfoCollection.h similarity index 100% rename from contrib/moses2/AlignmentInfoCollection.h rename to moses2/AlignmentInfoCollection.h diff --git a/contrib/moses2/ArcLists.cpp b/moses2/ArcLists.cpp similarity index 100% rename from contrib/moses2/ArcLists.cpp rename to moses2/ArcLists.cpp diff --git a/contrib/moses2/ArcLists.h b/moses2/ArcLists.h similarity index 100% rename from contrib/moses2/ArcLists.h rename to moses2/ArcLists.h diff --git a/contrib/moses2/Array.h b/moses2/Array.h similarity index 100% rename from contrib/moses2/Array.h rename to moses2/Array.h diff --git a/contrib/moses2/EstimatedScores.cpp b/moses2/EstimatedScores.cpp similarity index 100% rename from contrib/moses2/EstimatedScores.cpp rename to moses2/EstimatedScores.cpp diff --git a/contrib/moses2/EstimatedScores.h b/moses2/EstimatedScores.h similarity index 100% rename from contrib/moses2/EstimatedScores.h rename to moses2/EstimatedScores.h diff --git a/contrib/moses2/FF/Distortion.cpp b/moses2/FF/Distortion.cpp similarity index 100% rename from contrib/moses2/FF/Distortion.cpp rename to moses2/FF/Distortion.cpp diff --git a/contrib/moses2/FF/Distortion.h b/moses2/FF/Distortion.h similarity index 100% rename from contrib/moses2/FF/Distortion.h rename to moses2/FF/Distortion.h diff --git a/contrib/moses2/FF/FFState.cpp b/moses2/FF/FFState.cpp similarity index 100% rename from contrib/moses2/FF/FFState.cpp rename to moses2/FF/FFState.cpp diff --git a/contrib/moses2/FF/FFState.h b/moses2/FF/FFState.h similarity index 100% rename from contrib/moses2/FF/FFState.h rename to moses2/FF/FFState.h diff --git a/contrib/moses2/FF/FeatureFunction.cpp b/moses2/FF/FeatureFunction.cpp similarity index 100% rename from contrib/moses2/FF/FeatureFunction.cpp rename to moses2/FF/FeatureFunction.cpp diff --git a/contrib/moses2/FF/FeatureFunction.h b/moses2/FF/FeatureFunction.h similarity index 100% rename from contrib/moses2/FF/FeatureFunction.h rename to moses2/FF/FeatureFunction.h diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/moses2/FF/FeatureFunctions.cpp similarity index 100% rename from contrib/moses2/FF/FeatureFunctions.cpp rename to moses2/FF/FeatureFunctions.cpp diff --git a/contrib/moses2/FF/FeatureFunctions.h b/moses2/FF/FeatureFunctions.h similarity index 100% rename from contrib/moses2/FF/FeatureFunctions.h rename to moses2/FF/FeatureFunctions.h diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp similarity index 100% rename from contrib/moses2/FF/FeatureRegistry.cpp rename to moses2/FF/FeatureRegistry.cpp diff --git a/contrib/moses2/FF/FeatureRegistry.h b/moses2/FF/FeatureRegistry.h similarity index 100% rename from contrib/moses2/FF/FeatureRegistry.h rename to moses2/FF/FeatureRegistry.h diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp rename to moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h b/moses2/FF/LexicalReordering/BidirectionalReorderingState.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.h rename to moses2/FF/LexicalReordering/BidirectionalReorderingState.h diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp b/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp rename to moses2/FF/LexicalReordering/HReorderingBackwardState.cpp diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h b/moses2/FF/LexicalReordering/HReorderingBackwardState.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/HReorderingBackwardState.h rename to moses2/FF/LexicalReordering/HReorderingBackwardState.h diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp b/moses2/FF/LexicalReordering/HReorderingForwardState.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/HReorderingForwardState.cpp rename to moses2/FF/LexicalReordering/HReorderingForwardState.cpp diff --git a/contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h b/moses2/FF/LexicalReordering/HReorderingForwardState.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/HReorderingForwardState.h rename to moses2/FF/LexicalReordering/HReorderingForwardState.h diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.cpp b/moses2/FF/LexicalReordering/LRModel.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LRModel.cpp rename to moses2/FF/LexicalReordering/LRModel.cpp diff --git a/contrib/moses2/FF/LexicalReordering/LRModel.h b/moses2/FF/LexicalReordering/LRModel.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LRModel.h rename to moses2/FF/LexicalReordering/LRModel.h diff --git a/contrib/moses2/FF/LexicalReordering/LRState.cpp b/moses2/FF/LexicalReordering/LRState.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LRState.cpp rename to moses2/FF/LexicalReordering/LRState.cpp diff --git a/contrib/moses2/FF/LexicalReordering/LRState.h b/moses2/FF/LexicalReordering/LRState.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LRState.h rename to moses2/FF/LexicalReordering/LRState.h diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LexicalReordering.cpp rename to moses2/FF/LexicalReordering/LexicalReordering.cpp diff --git a/contrib/moses2/FF/LexicalReordering/LexicalReordering.h b/moses2/FF/LexicalReordering/LexicalReordering.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/LexicalReordering.h rename to moses2/FF/LexicalReordering/LexicalReordering.h diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp rename to moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp diff --git a/contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h rename to moses2/FF/LexicalReordering/PhraseBasedReorderingState.h diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp b/moses2/FF/LexicalReordering/ReorderingStack.cpp similarity index 100% rename from contrib/moses2/FF/LexicalReordering/ReorderingStack.cpp rename to moses2/FF/LexicalReordering/ReorderingStack.cpp diff --git a/contrib/moses2/FF/LexicalReordering/ReorderingStack.h b/moses2/FF/LexicalReordering/ReorderingStack.h similarity index 100% rename from contrib/moses2/FF/LexicalReordering/ReorderingStack.h rename to moses2/FF/LexicalReordering/ReorderingStack.h diff --git a/contrib/moses2/FF/OSM/KenOSM.cpp b/moses2/FF/OSM/KenOSM.cpp similarity index 100% rename from contrib/moses2/FF/OSM/KenOSM.cpp rename to moses2/FF/OSM/KenOSM.cpp diff --git a/contrib/moses2/FF/OSM/KenOSM.h b/moses2/FF/OSM/KenOSM.h similarity index 100% rename from contrib/moses2/FF/OSM/KenOSM.h rename to moses2/FF/OSM/KenOSM.h diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.cpp b/moses2/FF/OSM/OpSequenceModel.cpp similarity index 100% rename from contrib/moses2/FF/OSM/OpSequenceModel.cpp rename to moses2/FF/OSM/OpSequenceModel.cpp diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.h b/moses2/FF/OSM/OpSequenceModel.h similarity index 100% rename from contrib/moses2/FF/OSM/OpSequenceModel.h rename to moses2/FF/OSM/OpSequenceModel.h diff --git a/contrib/moses2/FF/OSM/osmHyp.cpp b/moses2/FF/OSM/osmHyp.cpp similarity index 100% rename from contrib/moses2/FF/OSM/osmHyp.cpp rename to moses2/FF/OSM/osmHyp.cpp diff --git a/contrib/moses2/FF/OSM/osmHyp.h b/moses2/FF/OSM/osmHyp.h similarity index 100% rename from contrib/moses2/FF/OSM/osmHyp.h rename to moses2/FF/OSM/osmHyp.h diff --git a/contrib/moses2/FF/PhrasePenalty.cpp b/moses2/FF/PhrasePenalty.cpp similarity index 100% rename from contrib/moses2/FF/PhrasePenalty.cpp rename to moses2/FF/PhrasePenalty.cpp diff --git a/contrib/moses2/FF/PhrasePenalty.h b/moses2/FF/PhrasePenalty.h similarity index 100% rename from contrib/moses2/FF/PhrasePenalty.h rename to moses2/FF/PhrasePenalty.h diff --git a/contrib/moses2/FF/PointerState.cpp b/moses2/FF/PointerState.cpp similarity index 100% rename from contrib/moses2/FF/PointerState.cpp rename to moses2/FF/PointerState.cpp diff --git a/contrib/moses2/FF/PointerState.h b/moses2/FF/PointerState.h similarity index 100% rename from contrib/moses2/FF/PointerState.h rename to moses2/FF/PointerState.h diff --git a/contrib/moses2/FF/SkeletonStatefulFF.cpp b/moses2/FF/SkeletonStatefulFF.cpp similarity index 100% rename from contrib/moses2/FF/SkeletonStatefulFF.cpp rename to moses2/FF/SkeletonStatefulFF.cpp diff --git a/contrib/moses2/FF/SkeletonStatefulFF.h b/moses2/FF/SkeletonStatefulFF.h similarity index 100% rename from contrib/moses2/FF/SkeletonStatefulFF.h rename to moses2/FF/SkeletonStatefulFF.h diff --git a/contrib/moses2/FF/SkeletonStatelessFF.cpp b/moses2/FF/SkeletonStatelessFF.cpp similarity index 100% rename from contrib/moses2/FF/SkeletonStatelessFF.cpp rename to moses2/FF/SkeletonStatelessFF.cpp diff --git a/contrib/moses2/FF/SkeletonStatelessFF.h b/moses2/FF/SkeletonStatelessFF.h similarity index 100% rename from contrib/moses2/FF/SkeletonStatelessFF.h rename to moses2/FF/SkeletonStatelessFF.h diff --git a/contrib/moses2/FF/StatefulFeatureFunction.cpp b/moses2/FF/StatefulFeatureFunction.cpp similarity index 100% rename from contrib/moses2/FF/StatefulFeatureFunction.cpp rename to moses2/FF/StatefulFeatureFunction.cpp diff --git a/contrib/moses2/FF/StatefulFeatureFunction.h b/moses2/FF/StatefulFeatureFunction.h similarity index 100% rename from contrib/moses2/FF/StatefulFeatureFunction.h rename to moses2/FF/StatefulFeatureFunction.h diff --git a/contrib/moses2/FF/StatelessFeatureFunction.cpp b/moses2/FF/StatelessFeatureFunction.cpp similarity index 100% rename from contrib/moses2/FF/StatelessFeatureFunction.cpp rename to moses2/FF/StatelessFeatureFunction.cpp diff --git a/contrib/moses2/FF/StatelessFeatureFunction.h b/moses2/FF/StatelessFeatureFunction.h similarity index 100% rename from contrib/moses2/FF/StatelessFeatureFunction.h rename to moses2/FF/StatelessFeatureFunction.h diff --git a/contrib/moses2/FF/WordPenalty.cpp b/moses2/FF/WordPenalty.cpp similarity index 100% rename from contrib/moses2/FF/WordPenalty.cpp rename to moses2/FF/WordPenalty.cpp diff --git a/contrib/moses2/FF/WordPenalty.h b/moses2/FF/WordPenalty.h similarity index 100% rename from contrib/moses2/FF/WordPenalty.h rename to moses2/FF/WordPenalty.h diff --git a/contrib/moses2/HypothesisBase.cpp b/moses2/HypothesisBase.cpp similarity index 100% rename from contrib/moses2/HypothesisBase.cpp rename to moses2/HypothesisBase.cpp diff --git a/contrib/moses2/HypothesisBase.h b/moses2/HypothesisBase.h similarity index 100% rename from contrib/moses2/HypothesisBase.h rename to moses2/HypothesisBase.h diff --git a/contrib/moses2/HypothesisColl.cpp b/moses2/HypothesisColl.cpp similarity index 100% rename from contrib/moses2/HypothesisColl.cpp rename to moses2/HypothesisColl.cpp diff --git a/contrib/moses2/HypothesisColl.h b/moses2/HypothesisColl.h similarity index 100% rename from contrib/moses2/HypothesisColl.h rename to moses2/HypothesisColl.h diff --git a/contrib/moses2/InputPathBase.cpp b/moses2/InputPathBase.cpp similarity index 100% rename from contrib/moses2/InputPathBase.cpp rename to moses2/InputPathBase.cpp diff --git a/contrib/moses2/InputPathBase.h b/moses2/InputPathBase.h similarity index 100% rename from contrib/moses2/InputPathBase.h rename to moses2/InputPathBase.h diff --git a/contrib/moses2/InputPathsBase.cpp b/moses2/InputPathsBase.cpp similarity index 100% rename from contrib/moses2/InputPathsBase.cpp rename to moses2/InputPathsBase.cpp diff --git a/contrib/moses2/InputPathsBase.h b/moses2/InputPathsBase.h similarity index 100% rename from contrib/moses2/InputPathsBase.h rename to moses2/InputPathsBase.h diff --git a/contrib/moses2/InputType.cpp b/moses2/InputType.cpp similarity index 100% rename from contrib/moses2/InputType.cpp rename to moses2/InputType.cpp diff --git a/contrib/moses2/InputType.h b/moses2/InputType.h similarity index 100% rename from contrib/moses2/InputType.h rename to moses2/InputType.h diff --git a/contrib/moses2/Jamfile b/moses2/Jamfile similarity index 97% rename from contrib/moses2/Jamfile rename to moses2/Jamfile index 98e1c1e30..ccd2b98e5 100644 --- a/contrib/moses2/Jamfile +++ b/moses2/Jamfile @@ -1,4 +1,4 @@ -alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../moses/TranslationModel/CompactPT//cmph ../../moses//moses ; +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ../moses//moses ; lib moses2_lib : AlignmentInfo.cpp diff --git a/contrib/moses2/LM/GPULM.cpp b/moses2/LM/GPULM.cpp similarity index 100% rename from contrib/moses2/LM/GPULM.cpp rename to moses2/LM/GPULM.cpp diff --git a/contrib/moses2/LM/GPULM.h b/moses2/LM/GPULM.h similarity index 100% rename from contrib/moses2/LM/GPULM.h rename to moses2/LM/GPULM.h diff --git a/contrib/moses2/LM/KENLM.cpp b/moses2/LM/KENLM.cpp similarity index 100% rename from contrib/moses2/LM/KENLM.cpp rename to moses2/LM/KENLM.cpp diff --git a/contrib/moses2/LM/KENLM.h b/moses2/LM/KENLM.h similarity index 100% rename from contrib/moses2/LM/KENLM.h rename to moses2/LM/KENLM.h diff --git a/contrib/moses2/LM/KENLMBatch.cpp b/moses2/LM/KENLMBatch.cpp similarity index 100% rename from contrib/moses2/LM/KENLMBatch.cpp rename to moses2/LM/KENLMBatch.cpp diff --git a/contrib/moses2/LM/KENLMBatch.h b/moses2/LM/KENLMBatch.h similarity index 100% rename from contrib/moses2/LM/KENLMBatch.h rename to moses2/LM/KENLMBatch.h diff --git a/contrib/moses2/LM/LanguageModel.cpp b/moses2/LM/LanguageModel.cpp similarity index 100% rename from contrib/moses2/LM/LanguageModel.cpp rename to moses2/LM/LanguageModel.cpp diff --git a/contrib/moses2/LM/LanguageModel.h b/moses2/LM/LanguageModel.h similarity index 100% rename from contrib/moses2/LM/LanguageModel.h rename to moses2/LM/LanguageModel.h diff --git a/contrib/moses2/LM/LanguageModelDALM.cpp b/moses2/LM/LanguageModelDALM.cpp similarity index 100% rename from contrib/moses2/LM/LanguageModelDALM.cpp rename to moses2/LM/LanguageModelDALM.cpp diff --git a/contrib/moses2/LM/LanguageModelDALM.h b/moses2/LM/LanguageModelDALM.h similarity index 100% rename from contrib/moses2/LM/LanguageModelDALM.h rename to moses2/LM/LanguageModelDALM.h diff --git a/contrib/moses2/Main.cpp b/moses2/Main.cpp similarity index 100% rename from contrib/moses2/Main.cpp rename to moses2/Main.cpp diff --git a/contrib/moses2/Main.h b/moses2/Main.h similarity index 100% rename from contrib/moses2/Main.h rename to moses2/Main.h diff --git a/contrib/moses2/ManagerBase.cpp b/moses2/ManagerBase.cpp similarity index 100% rename from contrib/moses2/ManagerBase.cpp rename to moses2/ManagerBase.cpp diff --git a/contrib/moses2/ManagerBase.h b/moses2/ManagerBase.h similarity index 100% rename from contrib/moses2/ManagerBase.h rename to moses2/ManagerBase.h diff --git a/contrib/moses2/MemPool.cpp b/moses2/MemPool.cpp similarity index 100% rename from contrib/moses2/MemPool.cpp rename to moses2/MemPool.cpp diff --git a/contrib/moses2/MemPool.h b/moses2/MemPool.h similarity index 100% rename from contrib/moses2/MemPool.h rename to moses2/MemPool.h diff --git a/contrib/moses2/MemPoolAllocator.h b/moses2/MemPoolAllocator.h similarity index 100% rename from contrib/moses2/MemPoolAllocator.h rename to moses2/MemPoolAllocator.h diff --git a/contrib/moses2/MorphoTrie/MorphTrie.h b/moses2/MorphoTrie/MorphTrie.h similarity index 100% rename from contrib/moses2/MorphoTrie/MorphTrie.h rename to moses2/MorphoTrie/MorphTrie.h diff --git a/contrib/moses2/MorphoTrie/Node.h b/moses2/MorphoTrie/Node.h similarity index 100% rename from contrib/moses2/MorphoTrie/Node.h rename to moses2/MorphoTrie/Node.h diff --git a/contrib/moses2/MorphoTrie/utils.h b/moses2/MorphoTrie/utils.h similarity index 100% rename from contrib/moses2/MorphoTrie/utils.h rename to moses2/MorphoTrie/utils.h diff --git a/contrib/moses2/Phrase.cpp b/moses2/Phrase.cpp similarity index 100% rename from contrib/moses2/Phrase.cpp rename to moses2/Phrase.cpp diff --git a/contrib/moses2/Phrase.h b/moses2/Phrase.h similarity index 100% rename from contrib/moses2/Phrase.h rename to moses2/Phrase.h diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp b/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp rename to moses2/PhraseBased/CubePruningMiniStack/Misc.cpp diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h b/moses2/PhraseBased/CubePruningMiniStack/Misc.h similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Misc.h rename to moses2/PhraseBased/CubePruningMiniStack/Misc.h diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/moses2/PhraseBased/CubePruningMiniStack/Search.cpp similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp rename to moses2/PhraseBased/CubePruningMiniStack/Search.cpp diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h b/moses2/PhraseBased/CubePruningMiniStack/Search.h similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Search.h rename to moses2/PhraseBased/CubePruningMiniStack/Search.h diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp rename to moses2/PhraseBased/CubePruningMiniStack/Stack.cpp diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h b/moses2/PhraseBased/CubePruningMiniStack/Stack.h similarity index 100% rename from contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.h rename to moses2/PhraseBased/CubePruningMiniStack/Stack.h diff --git a/contrib/moses2/PhraseBased/Hypothesis.cpp b/moses2/PhraseBased/Hypothesis.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Hypothesis.cpp rename to moses2/PhraseBased/Hypothesis.cpp diff --git a/contrib/moses2/PhraseBased/Hypothesis.h b/moses2/PhraseBased/Hypothesis.h similarity index 100% rename from contrib/moses2/PhraseBased/Hypothesis.h rename to moses2/PhraseBased/Hypothesis.h diff --git a/contrib/moses2/PhraseBased/InputPath.cpp b/moses2/PhraseBased/InputPath.cpp similarity index 100% rename from contrib/moses2/PhraseBased/InputPath.cpp rename to moses2/PhraseBased/InputPath.cpp diff --git a/contrib/moses2/PhraseBased/InputPath.h b/moses2/PhraseBased/InputPath.h similarity index 100% rename from contrib/moses2/PhraseBased/InputPath.h rename to moses2/PhraseBased/InputPath.h diff --git a/contrib/moses2/PhraseBased/InputPaths.cpp b/moses2/PhraseBased/InputPaths.cpp similarity index 100% rename from contrib/moses2/PhraseBased/InputPaths.cpp rename to moses2/PhraseBased/InputPaths.cpp diff --git a/contrib/moses2/PhraseBased/InputPaths.h b/moses2/PhraseBased/InputPaths.h similarity index 100% rename from contrib/moses2/PhraseBased/InputPaths.h rename to moses2/PhraseBased/InputPaths.h diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Manager.cpp rename to moses2/PhraseBased/Manager.cpp diff --git a/contrib/moses2/PhraseBased/Manager.h b/moses2/PhraseBased/Manager.h similarity index 100% rename from contrib/moses2/PhraseBased/Manager.h rename to moses2/PhraseBased/Manager.h diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/moses2/PhraseBased/Normal/Search.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Search.cpp rename to moses2/PhraseBased/Normal/Search.cpp diff --git a/contrib/moses2/PhraseBased/Normal/Search.h b/moses2/PhraseBased/Normal/Search.h similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Search.h rename to moses2/PhraseBased/Normal/Search.h diff --git a/contrib/moses2/PhraseBased/Normal/Stack.cpp b/moses2/PhraseBased/Normal/Stack.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Stack.cpp rename to moses2/PhraseBased/Normal/Stack.cpp diff --git a/contrib/moses2/PhraseBased/Normal/Stack.h b/moses2/PhraseBased/Normal/Stack.h similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Stack.h rename to moses2/PhraseBased/Normal/Stack.h diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.cpp b/moses2/PhraseBased/Normal/Stacks.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Stacks.cpp rename to moses2/PhraseBased/Normal/Stacks.cpp diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.h b/moses2/PhraseBased/Normal/Stacks.h similarity index 100% rename from contrib/moses2/PhraseBased/Normal/Stacks.h rename to moses2/PhraseBased/Normal/Stacks.h diff --git a/contrib/moses2/PhraseBased/PhraseImpl.cpp b/moses2/PhraseBased/PhraseImpl.cpp similarity index 100% rename from contrib/moses2/PhraseBased/PhraseImpl.cpp rename to moses2/PhraseBased/PhraseImpl.cpp diff --git a/contrib/moses2/PhraseBased/PhraseImpl.h b/moses2/PhraseBased/PhraseImpl.h similarity index 100% rename from contrib/moses2/PhraseBased/PhraseImpl.h rename to moses2/PhraseBased/PhraseImpl.h diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp b/moses2/PhraseBased/ReorderingConstraint.cpp similarity index 100% rename from contrib/moses2/PhraseBased/ReorderingConstraint.cpp rename to moses2/PhraseBased/ReorderingConstraint.cpp diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.h b/moses2/PhraseBased/ReorderingConstraint.h similarity index 100% rename from contrib/moses2/PhraseBased/ReorderingConstraint.h rename to moses2/PhraseBased/ReorderingConstraint.h diff --git a/contrib/moses2/PhraseBased/Search.cpp b/moses2/PhraseBased/Search.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Search.cpp rename to moses2/PhraseBased/Search.cpp diff --git a/contrib/moses2/PhraseBased/Search.h b/moses2/PhraseBased/Search.h similarity index 100% rename from contrib/moses2/PhraseBased/Search.h rename to moses2/PhraseBased/Search.h diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/moses2/PhraseBased/Sentence.cpp similarity index 100% rename from contrib/moses2/PhraseBased/Sentence.cpp rename to moses2/PhraseBased/Sentence.cpp diff --git a/contrib/moses2/PhraseBased/Sentence.h b/moses2/PhraseBased/Sentence.h similarity index 100% rename from contrib/moses2/PhraseBased/Sentence.h rename to moses2/PhraseBased/Sentence.h diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.cpp b/moses2/PhraseBased/TargetPhraseImpl.cpp similarity index 100% rename from contrib/moses2/PhraseBased/TargetPhraseImpl.cpp rename to moses2/PhraseBased/TargetPhraseImpl.cpp diff --git a/contrib/moses2/PhraseBased/TargetPhraseImpl.h b/moses2/PhraseBased/TargetPhraseImpl.h similarity index 100% rename from contrib/moses2/PhraseBased/TargetPhraseImpl.h rename to moses2/PhraseBased/TargetPhraseImpl.h diff --git a/contrib/moses2/PhraseBased/TargetPhrases.cpp b/moses2/PhraseBased/TargetPhrases.cpp similarity index 100% rename from contrib/moses2/PhraseBased/TargetPhrases.cpp rename to moses2/PhraseBased/TargetPhrases.cpp diff --git a/contrib/moses2/PhraseBased/TargetPhrases.h b/moses2/PhraseBased/TargetPhrases.h similarity index 100% rename from contrib/moses2/PhraseBased/TargetPhrases.h rename to moses2/PhraseBased/TargetPhrases.h diff --git a/contrib/moses2/PhraseBased/TrellisPath.cpp b/moses2/PhraseBased/TrellisPath.cpp similarity index 100% rename from contrib/moses2/PhraseBased/TrellisPath.cpp rename to moses2/PhraseBased/TrellisPath.cpp diff --git a/contrib/moses2/PhraseBased/TrellisPath.h b/moses2/PhraseBased/TrellisPath.h similarity index 100% rename from contrib/moses2/PhraseBased/TrellisPath.h rename to moses2/PhraseBased/TrellisPath.h diff --git a/contrib/moses2/PhraseImplTemplate.h b/moses2/PhraseImplTemplate.h similarity index 100% rename from contrib/moses2/PhraseImplTemplate.h rename to moses2/PhraseImplTemplate.h diff --git a/contrib/moses2/Recycler.cpp b/moses2/Recycler.cpp similarity index 100% rename from contrib/moses2/Recycler.cpp rename to moses2/Recycler.cpp diff --git a/contrib/moses2/Recycler.h b/moses2/Recycler.h similarity index 100% rename from contrib/moses2/Recycler.h rename to moses2/Recycler.h diff --git a/contrib/moses2/SCFG/ActiveChart.cpp b/moses2/SCFG/ActiveChart.cpp similarity index 100% rename from contrib/moses2/SCFG/ActiveChart.cpp rename to moses2/SCFG/ActiveChart.cpp diff --git a/contrib/moses2/SCFG/ActiveChart.h b/moses2/SCFG/ActiveChart.h similarity index 100% rename from contrib/moses2/SCFG/ActiveChart.h rename to moses2/SCFG/ActiveChart.h diff --git a/contrib/moses2/SCFG/Hypothesis.cpp b/moses2/SCFG/Hypothesis.cpp similarity index 100% rename from contrib/moses2/SCFG/Hypothesis.cpp rename to moses2/SCFG/Hypothesis.cpp diff --git a/contrib/moses2/SCFG/Hypothesis.h b/moses2/SCFG/Hypothesis.h similarity index 100% rename from contrib/moses2/SCFG/Hypothesis.h rename to moses2/SCFG/Hypothesis.h diff --git a/contrib/moses2/SCFG/InputPath.cpp b/moses2/SCFG/InputPath.cpp similarity index 100% rename from contrib/moses2/SCFG/InputPath.cpp rename to moses2/SCFG/InputPath.cpp diff --git a/contrib/moses2/SCFG/InputPath.h b/moses2/SCFG/InputPath.h similarity index 100% rename from contrib/moses2/SCFG/InputPath.h rename to moses2/SCFG/InputPath.h diff --git a/contrib/moses2/SCFG/InputPaths.cpp b/moses2/SCFG/InputPaths.cpp similarity index 100% rename from contrib/moses2/SCFG/InputPaths.cpp rename to moses2/SCFG/InputPaths.cpp diff --git a/contrib/moses2/SCFG/InputPaths.h b/moses2/SCFG/InputPaths.h similarity index 100% rename from contrib/moses2/SCFG/InputPaths.h rename to moses2/SCFG/InputPaths.h diff --git a/contrib/moses2/SCFG/Manager.cpp b/moses2/SCFG/Manager.cpp similarity index 100% rename from contrib/moses2/SCFG/Manager.cpp rename to moses2/SCFG/Manager.cpp diff --git a/contrib/moses2/SCFG/Manager.h b/moses2/SCFG/Manager.h similarity index 100% rename from contrib/moses2/SCFG/Manager.h rename to moses2/SCFG/Manager.h diff --git a/contrib/moses2/SCFG/Misc.cpp b/moses2/SCFG/Misc.cpp similarity index 100% rename from contrib/moses2/SCFG/Misc.cpp rename to moses2/SCFG/Misc.cpp diff --git a/contrib/moses2/SCFG/Misc.h b/moses2/SCFG/Misc.h similarity index 100% rename from contrib/moses2/SCFG/Misc.h rename to moses2/SCFG/Misc.h diff --git a/contrib/moses2/SCFG/PhraseImpl.cpp b/moses2/SCFG/PhraseImpl.cpp similarity index 100% rename from contrib/moses2/SCFG/PhraseImpl.cpp rename to moses2/SCFG/PhraseImpl.cpp diff --git a/contrib/moses2/SCFG/PhraseImpl.h b/moses2/SCFG/PhraseImpl.h similarity index 100% rename from contrib/moses2/SCFG/PhraseImpl.h rename to moses2/SCFG/PhraseImpl.h diff --git a/contrib/moses2/SCFG/Sentence.cpp b/moses2/SCFG/Sentence.cpp similarity index 100% rename from contrib/moses2/SCFG/Sentence.cpp rename to moses2/SCFG/Sentence.cpp diff --git a/contrib/moses2/SCFG/Sentence.h b/moses2/SCFG/Sentence.h similarity index 100% rename from contrib/moses2/SCFG/Sentence.h rename to moses2/SCFG/Sentence.h diff --git a/contrib/moses2/SCFG/Stack.cpp b/moses2/SCFG/Stack.cpp similarity index 100% rename from contrib/moses2/SCFG/Stack.cpp rename to moses2/SCFG/Stack.cpp diff --git a/contrib/moses2/SCFG/Stack.h b/moses2/SCFG/Stack.h similarity index 100% rename from contrib/moses2/SCFG/Stack.h rename to moses2/SCFG/Stack.h diff --git a/contrib/moses2/SCFG/Stacks.cpp b/moses2/SCFG/Stacks.cpp similarity index 100% rename from contrib/moses2/SCFG/Stacks.cpp rename to moses2/SCFG/Stacks.cpp diff --git a/contrib/moses2/SCFG/Stacks.h b/moses2/SCFG/Stacks.h similarity index 100% rename from contrib/moses2/SCFG/Stacks.h rename to moses2/SCFG/Stacks.h diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.cpp b/moses2/SCFG/TargetPhraseImpl.cpp similarity index 100% rename from contrib/moses2/SCFG/TargetPhraseImpl.cpp rename to moses2/SCFG/TargetPhraseImpl.cpp diff --git a/contrib/moses2/SCFG/TargetPhraseImpl.h b/moses2/SCFG/TargetPhraseImpl.h similarity index 100% rename from contrib/moses2/SCFG/TargetPhraseImpl.h rename to moses2/SCFG/TargetPhraseImpl.h diff --git a/contrib/moses2/SCFG/TargetPhrases.cpp b/moses2/SCFG/TargetPhrases.cpp similarity index 100% rename from contrib/moses2/SCFG/TargetPhrases.cpp rename to moses2/SCFG/TargetPhrases.cpp diff --git a/contrib/moses2/SCFG/TargetPhrases.h b/moses2/SCFG/TargetPhrases.h similarity index 100% rename from contrib/moses2/SCFG/TargetPhrases.h rename to moses2/SCFG/TargetPhrases.h diff --git a/contrib/moses2/SCFG/Word.cpp b/moses2/SCFG/Word.cpp similarity index 100% rename from contrib/moses2/SCFG/Word.cpp rename to moses2/SCFG/Word.cpp diff --git a/contrib/moses2/SCFG/Word.h b/moses2/SCFG/Word.h similarity index 100% rename from contrib/moses2/SCFG/Word.h rename to moses2/SCFG/Word.h diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp b/moses2/SCFG/nbest/KBestExtractor.cpp similarity index 100% rename from contrib/moses2/SCFG/nbest/KBestExtractor.cpp rename to moses2/SCFG/nbest/KBestExtractor.cpp diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.h b/moses2/SCFG/nbest/KBestExtractor.h similarity index 100% rename from contrib/moses2/SCFG/nbest/KBestExtractor.h rename to moses2/SCFG/nbest/KBestExtractor.h diff --git a/contrib/moses2/SCFG/nbest/NBest.cpp b/moses2/SCFG/nbest/NBest.cpp similarity index 100% rename from contrib/moses2/SCFG/nbest/NBest.cpp rename to moses2/SCFG/nbest/NBest.cpp diff --git a/contrib/moses2/SCFG/nbest/NBest.h b/moses2/SCFG/nbest/NBest.h similarity index 100% rename from contrib/moses2/SCFG/nbest/NBest.h rename to moses2/SCFG/nbest/NBest.h diff --git a/contrib/moses2/SCFG/nbest/NBestColl.cpp b/moses2/SCFG/nbest/NBestColl.cpp similarity index 100% rename from contrib/moses2/SCFG/nbest/NBestColl.cpp rename to moses2/SCFG/nbest/NBestColl.cpp diff --git a/contrib/moses2/SCFG/nbest/NBestColl.h b/moses2/SCFG/nbest/NBestColl.h similarity index 100% rename from contrib/moses2/SCFG/nbest/NBestColl.h rename to moses2/SCFG/nbest/NBestColl.h diff --git a/contrib/moses2/SCFG/nbest/NBests.cpp b/moses2/SCFG/nbest/NBests.cpp similarity index 100% rename from contrib/moses2/SCFG/nbest/NBests.cpp rename to moses2/SCFG/nbest/NBests.cpp diff --git a/contrib/moses2/SCFG/nbest/NBests.h b/moses2/SCFG/nbest/NBests.h similarity index 100% rename from contrib/moses2/SCFG/nbest/NBests.h rename to moses2/SCFG/nbest/NBests.h diff --git a/contrib/moses2/Scores.cpp b/moses2/Scores.cpp similarity index 100% rename from contrib/moses2/Scores.cpp rename to moses2/Scores.cpp diff --git a/contrib/moses2/Scores.h b/moses2/Scores.h similarity index 100% rename from contrib/moses2/Scores.h rename to moses2/Scores.h diff --git a/contrib/moses2/SubPhrase.cpp b/moses2/SubPhrase.cpp similarity index 100% rename from contrib/moses2/SubPhrase.cpp rename to moses2/SubPhrase.cpp diff --git a/contrib/moses2/SubPhrase.h b/moses2/SubPhrase.h similarity index 100% rename from contrib/moses2/SubPhrase.h rename to moses2/SubPhrase.h diff --git a/contrib/moses2/System.cpp b/moses2/System.cpp similarity index 100% rename from contrib/moses2/System.cpp rename to moses2/System.cpp diff --git a/contrib/moses2/System.h b/moses2/System.h similarity index 100% rename from contrib/moses2/System.h rename to moses2/System.h diff --git a/contrib/moses2/TargetPhrase.cpp b/moses2/TargetPhrase.cpp similarity index 100% rename from contrib/moses2/TargetPhrase.cpp rename to moses2/TargetPhrase.cpp diff --git a/contrib/moses2/TargetPhrase.h b/moses2/TargetPhrase.h similarity index 100% rename from contrib/moses2/TargetPhrase.h rename to moses2/TargetPhrase.h diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp rename to moses2/TranslationModel/CompactPT/BlockHashIndex.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/moses2/TranslationModel/CompactPT/BlockHashIndex.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h rename to moses2/TranslationModel/CompactPT/BlockHashIndex.h diff --git a/contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h b/moses2/TranslationModel/CompactPT/CanonicalHuffman.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/CanonicalHuffman.h rename to moses2/TranslationModel/CompactPT/CanonicalHuffman.h diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp rename to moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h rename to moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp rename to moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h rename to moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h diff --git a/contrib/moses2/TranslationModel/CompactPT/ListCoders.h b/moses2/TranslationModel/CompactPT/ListCoders.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/ListCoders.h rename to moses2/TranslationModel/CompactPT/ListCoders.h diff --git a/contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h b/moses2/TranslationModel/CompactPT/MmapAllocator.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/MmapAllocator.h rename to moses2/TranslationModel/CompactPT/MmapAllocator.h diff --git a/contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h b/moses2/TranslationModel/CompactPT/MonotonicVector.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/MonotonicVector.h rename to moses2/TranslationModel/CompactPT/MonotonicVector.h diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp b/moses2/TranslationModel/CompactPT/MurmurHash3.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/MurmurHash3.cpp rename to moses2/TranslationModel/CompactPT/MurmurHash3.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h b/moses2/TranslationModel/CompactPT/MurmurHash3.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/MurmurHash3.h rename to moses2/TranslationModel/CompactPT/MurmurHash3.h diff --git a/contrib/moses2/TranslationModel/CompactPT/PackedArray.h b/moses2/TranslationModel/CompactPT/PackedArray.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/PackedArray.h rename to moses2/TranslationModel/CompactPT/PackedArray.h diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp rename to moses2/TranslationModel/CompactPT/PhraseDecoder.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/moses2/TranslationModel/CompactPT/PhraseDecoder.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h rename to moses2/TranslationModel/CompactPT/PhraseDecoder.h diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp rename to moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/PhraseTableCompact.h rename to moses2/TranslationModel/CompactPT/PhraseTableCompact.h diff --git a/contrib/moses2/TranslationModel/CompactPT/StringVector.h b/moses2/TranslationModel/CompactPT/StringVector.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/StringVector.h rename to moses2/TranslationModel/CompactPT/StringVector.h diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp rename to moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h rename to moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp b/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp rename to moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp diff --git a/contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h b/moses2/TranslationModel/CompactPT/ThrowingFwrite.h similarity index 100% rename from contrib/moses2/TranslationModel/CompactPT/ThrowingFwrite.h rename to moses2/TranslationModel/CompactPT/ThrowingFwrite.h diff --git a/contrib/moses2/TranslationModel/Memory/Node.h b/moses2/TranslationModel/Memory/Node.h similarity index 100% rename from contrib/moses2/TranslationModel/Memory/Node.h rename to moses2/TranslationModel/Memory/Node.h diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp b/moses2/TranslationModel/Memory/PhraseTableMemory.cpp similarity index 100% rename from contrib/moses2/TranslationModel/Memory/PhraseTableMemory.cpp rename to moses2/TranslationModel/Memory/PhraseTableMemory.cpp diff --git a/contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h b/moses2/TranslationModel/Memory/PhraseTableMemory.h similarity index 100% rename from contrib/moses2/TranslationModel/Memory/PhraseTableMemory.h rename to moses2/TranslationModel/Memory/PhraseTableMemory.h diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/moses2/TranslationModel/PhraseTable.cpp similarity index 100% rename from contrib/moses2/TranslationModel/PhraseTable.cpp rename to moses2/TranslationModel/PhraseTable.cpp diff --git a/contrib/moses2/TranslationModel/PhraseTable.h b/moses2/TranslationModel/PhraseTable.h similarity index 100% rename from contrib/moses2/TranslationModel/PhraseTable.h rename to moses2/TranslationModel/PhraseTable.h diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp rename to moses2/TranslationModel/ProbingPT/ProbingPT.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/moses2/TranslationModel/ProbingPT/ProbingPT.h similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h rename to moses2/TranslationModel/ProbingPT/ProbingPT.h diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp b/moses2/TranslationModel/ProbingPT/StoreTarget.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/StoreTarget.cpp rename to moses2/TranslationModel/ProbingPT/StoreTarget.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h b/moses2/TranslationModel/ProbingPT/StoreTarget.h similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/StoreTarget.h rename to moses2/TranslationModel/ProbingPT/StoreTarget.h diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp b/moses2/TranslationModel/ProbingPT/StoreVocab.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/StoreVocab.cpp rename to moses2/TranslationModel/ProbingPT/StoreVocab.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h b/moses2/TranslationModel/ProbingPT/StoreVocab.h similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/StoreVocab.h rename to moses2/TranslationModel/ProbingPT/StoreVocab.h diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.cpp b/moses2/TranslationModel/ProbingPT/hash.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/hash.cpp rename to moses2/TranslationModel/ProbingPT/hash.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/hash.hh b/moses2/TranslationModel/ProbingPT/hash.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/hash.hh rename to moses2/TranslationModel/ProbingPT/hash.hh diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp b/moses2/TranslationModel/ProbingPT/line_splitter.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/line_splitter.cpp rename to moses2/TranslationModel/ProbingPT/line_splitter.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh b/moses2/TranslationModel/ProbingPT/line_splitter.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/line_splitter.hh rename to moses2/TranslationModel/ProbingPT/line_splitter.hh diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp rename to moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh rename to moses2/TranslationModel/ProbingPT/probing_hash_utils.hh diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/moses2/TranslationModel/ProbingPT/querying.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/querying.cpp rename to moses2/TranslationModel/ProbingPT/querying.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/moses2/TranslationModel/ProbingPT/querying.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/querying.hh rename to moses2/TranslationModel/ProbingPT/querying.hh diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.cpp b/moses2/TranslationModel/ProbingPT/storing.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/storing.cpp rename to moses2/TranslationModel/ProbingPT/storing.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.hh b/moses2/TranslationModel/ProbingPT/storing.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/storing.hh rename to moses2/TranslationModel/ProbingPT/storing.hh diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp b/moses2/TranslationModel/ProbingPT/vocabid.cpp similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/vocabid.cpp rename to moses2/TranslationModel/ProbingPT/vocabid.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/vocabid.hh b/moses2/TranslationModel/ProbingPT/vocabid.hh similarity index 100% rename from contrib/moses2/TranslationModel/ProbingPT/vocabid.hh rename to moses2/TranslationModel/ProbingPT/vocabid.hh diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/moses2/TranslationModel/Transliteration.cpp similarity index 100% rename from contrib/moses2/TranslationModel/Transliteration.cpp rename to moses2/TranslationModel/Transliteration.cpp diff --git a/contrib/moses2/TranslationModel/Transliteration.h b/moses2/TranslationModel/Transliteration.h similarity index 100% rename from contrib/moses2/TranslationModel/Transliteration.h rename to moses2/TranslationModel/Transliteration.h diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.cpp b/moses2/TranslationModel/UnknownWordPenalty.cpp similarity index 100% rename from contrib/moses2/TranslationModel/UnknownWordPenalty.cpp rename to moses2/TranslationModel/UnknownWordPenalty.cpp diff --git a/contrib/moses2/TranslationModel/UnknownWordPenalty.h b/moses2/TranslationModel/UnknownWordPenalty.h similarity index 100% rename from contrib/moses2/TranslationModel/UnknownWordPenalty.h rename to moses2/TranslationModel/UnknownWordPenalty.h diff --git a/contrib/moses2/TranslationTask.cpp b/moses2/TranslationTask.cpp similarity index 100% rename from contrib/moses2/TranslationTask.cpp rename to moses2/TranslationTask.cpp diff --git a/contrib/moses2/TranslationTask.h b/moses2/TranslationTask.h similarity index 100% rename from contrib/moses2/TranslationTask.h rename to moses2/TranslationTask.h diff --git a/contrib/moses2/TrellisPaths.cpp b/moses2/TrellisPaths.cpp similarity index 100% rename from contrib/moses2/TrellisPaths.cpp rename to moses2/TrellisPaths.cpp diff --git a/contrib/moses2/TrellisPaths.h b/moses2/TrellisPaths.h similarity index 100% rename from contrib/moses2/TrellisPaths.h rename to moses2/TrellisPaths.h diff --git a/contrib/moses2/TypeDef.cpp b/moses2/TypeDef.cpp similarity index 100% rename from contrib/moses2/TypeDef.cpp rename to moses2/TypeDef.cpp diff --git a/contrib/moses2/TypeDef.h b/moses2/TypeDef.h similarity index 100% rename from contrib/moses2/TypeDef.h rename to moses2/TypeDef.h diff --git a/contrib/moses2/Vector.cpp b/moses2/Vector.cpp similarity index 100% rename from contrib/moses2/Vector.cpp rename to moses2/Vector.cpp diff --git a/contrib/moses2/Vector.h b/moses2/Vector.h similarity index 100% rename from contrib/moses2/Vector.h rename to moses2/Vector.h diff --git a/contrib/moses2/Weights.cpp b/moses2/Weights.cpp similarity index 100% rename from contrib/moses2/Weights.cpp rename to moses2/Weights.cpp diff --git a/contrib/moses2/Weights.h b/moses2/Weights.h similarity index 100% rename from contrib/moses2/Weights.h rename to moses2/Weights.h diff --git a/contrib/moses2/Word.cpp b/moses2/Word.cpp similarity index 100% rename from contrib/moses2/Word.cpp rename to moses2/Word.cpp diff --git a/contrib/moses2/Word.h b/moses2/Word.h similarity index 100% rename from contrib/moses2/Word.h rename to moses2/Word.h diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp b/moses2/defer/CubePruningBitmapStack/Misc.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Misc.cpp rename to moses2/defer/CubePruningBitmapStack/Misc.cpp diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Misc.h b/moses2/defer/CubePruningBitmapStack/Misc.h similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Misc.h rename to moses2/defer/CubePruningBitmapStack/Misc.h diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.cpp b/moses2/defer/CubePruningBitmapStack/Search.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Search.cpp rename to moses2/defer/CubePruningBitmapStack/Search.cpp diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Search.h b/moses2/defer/CubePruningBitmapStack/Search.h similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Search.h rename to moses2/defer/CubePruningBitmapStack/Search.h diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp b/moses2/defer/CubePruningBitmapStack/Stack.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Stack.cpp rename to moses2/defer/CubePruningBitmapStack/Stack.cpp diff --git a/contrib/moses2/defer/CubePruningBitmapStack/Stack.h b/moses2/defer/CubePruningBitmapStack/Stack.h similarity index 100% rename from contrib/moses2/defer/CubePruningBitmapStack/Stack.h rename to moses2/defer/CubePruningBitmapStack/Stack.h diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp b/moses2/defer/CubePruningCardinalStack/Misc.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Misc.cpp rename to moses2/defer/CubePruningCardinalStack/Misc.cpp diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Misc.h b/moses2/defer/CubePruningCardinalStack/Misc.h similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Misc.h rename to moses2/defer/CubePruningCardinalStack/Misc.h diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.cpp b/moses2/defer/CubePruningCardinalStack/Search.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Search.cpp rename to moses2/defer/CubePruningCardinalStack/Search.cpp diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Search.h b/moses2/defer/CubePruningCardinalStack/Search.h similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Search.h rename to moses2/defer/CubePruningCardinalStack/Search.h diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp b/moses2/defer/CubePruningCardinalStack/Stack.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Stack.cpp rename to moses2/defer/CubePruningCardinalStack/Stack.cpp diff --git a/contrib/moses2/defer/CubePruningCardinalStack/Stack.h b/moses2/defer/CubePruningCardinalStack/Stack.h similarity index 100% rename from contrib/moses2/defer/CubePruningCardinalStack/Stack.h rename to moses2/defer/CubePruningCardinalStack/Stack.h diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp b/moses2/defer/CubePruningPerBitmap/Misc.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp rename to moses2/defer/CubePruningPerBitmap/Misc.cpp diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h b/moses2/defer/CubePruningPerBitmap/Misc.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Misc.h rename to moses2/defer/CubePruningPerBitmap/Misc.h diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp b/moses2/defer/CubePruningPerBitmap/Search.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Search.cpp rename to moses2/defer/CubePruningPerBitmap/Search.cpp diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.h b/moses2/defer/CubePruningPerBitmap/Search.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Search.h rename to moses2/defer/CubePruningPerBitmap/Search.h diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/moses2/defer/CubePruningPerBitmap/Stacks.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp rename to moses2/defer/CubePruningPerBitmap/Stacks.cpp diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h b/moses2/defer/CubePruningPerBitmap/Stacks.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerBitmap/Stacks.h rename to moses2/defer/CubePruningPerBitmap/Stacks.h diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp b/moses2/defer/CubePruningPerMiniStack/Misc.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Misc.cpp rename to moses2/defer/CubePruningPerMiniStack/Misc.cpp diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Misc.h b/moses2/defer/CubePruningPerMiniStack/Misc.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Misc.h rename to moses2/defer/CubePruningPerMiniStack/Misc.h diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp b/moses2/defer/CubePruningPerMiniStack/Search.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Search.cpp rename to moses2/defer/CubePruningPerMiniStack/Search.cpp diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Search.h b/moses2/defer/CubePruningPerMiniStack/Search.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Search.h rename to moses2/defer/CubePruningPerMiniStack/Search.h diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp b/moses2/defer/CubePruningPerMiniStack/Stacks.cpp similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Stacks.cpp rename to moses2/defer/CubePruningPerMiniStack/Stacks.cpp diff --git a/contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h b/moses2/defer/CubePruningPerMiniStack/Stacks.h similarity index 100% rename from contrib/moses2/defer/CubePruningPerMiniStack/Stacks.h rename to moses2/defer/CubePruningPerMiniStack/Stacks.h diff --git a/contrib/moses2/legacy/Bitmap.cpp b/moses2/legacy/Bitmap.cpp similarity index 100% rename from contrib/moses2/legacy/Bitmap.cpp rename to moses2/legacy/Bitmap.cpp diff --git a/contrib/moses2/legacy/Bitmap.h b/moses2/legacy/Bitmap.h similarity index 100% rename from contrib/moses2/legacy/Bitmap.h rename to moses2/legacy/Bitmap.h diff --git a/contrib/moses2/legacy/Bitmaps.cpp b/moses2/legacy/Bitmaps.cpp similarity index 100% rename from contrib/moses2/legacy/Bitmaps.cpp rename to moses2/legacy/Bitmaps.cpp diff --git a/contrib/moses2/legacy/Bitmaps.h b/moses2/legacy/Bitmaps.h similarity index 100% rename from contrib/moses2/legacy/Bitmaps.h rename to moses2/legacy/Bitmaps.h diff --git a/contrib/moses2/legacy/Factor.cpp b/moses2/legacy/Factor.cpp similarity index 100% rename from contrib/moses2/legacy/Factor.cpp rename to moses2/legacy/Factor.cpp diff --git a/contrib/moses2/legacy/Factor.h b/moses2/legacy/Factor.h similarity index 100% rename from contrib/moses2/legacy/Factor.h rename to moses2/legacy/Factor.h diff --git a/contrib/moses2/legacy/FactorCollection.cpp b/moses2/legacy/FactorCollection.cpp similarity index 100% rename from contrib/moses2/legacy/FactorCollection.cpp rename to moses2/legacy/FactorCollection.cpp diff --git a/contrib/moses2/legacy/FactorCollection.h b/moses2/legacy/FactorCollection.h similarity index 100% rename from contrib/moses2/legacy/FactorCollection.h rename to moses2/legacy/FactorCollection.h diff --git a/contrib/moses2/legacy/InputFileStream.cpp b/moses2/legacy/InputFileStream.cpp similarity index 100% rename from contrib/moses2/legacy/InputFileStream.cpp rename to moses2/legacy/InputFileStream.cpp diff --git a/contrib/moses2/legacy/InputFileStream.h b/moses2/legacy/InputFileStream.h similarity index 100% rename from contrib/moses2/legacy/InputFileStream.h rename to moses2/legacy/InputFileStream.h diff --git a/contrib/moses2/legacy/Matrix.cpp b/moses2/legacy/Matrix.cpp similarity index 100% rename from contrib/moses2/legacy/Matrix.cpp rename to moses2/legacy/Matrix.cpp diff --git a/contrib/moses2/legacy/Matrix.h b/moses2/legacy/Matrix.h similarity index 100% rename from contrib/moses2/legacy/Matrix.h rename to moses2/legacy/Matrix.h diff --git a/contrib/moses2/legacy/OutputCollector.h b/moses2/legacy/OutputCollector.h similarity index 100% rename from contrib/moses2/legacy/OutputCollector.h rename to moses2/legacy/OutputCollector.h diff --git a/contrib/moses2/legacy/OutputFileStream.cpp b/moses2/legacy/OutputFileStream.cpp similarity index 100% rename from contrib/moses2/legacy/OutputFileStream.cpp rename to moses2/legacy/OutputFileStream.cpp diff --git a/contrib/moses2/legacy/OutputFileStream.h b/moses2/legacy/OutputFileStream.h similarity index 100% rename from contrib/moses2/legacy/OutputFileStream.h rename to moses2/legacy/OutputFileStream.h diff --git a/contrib/moses2/legacy/Parameter.cpp b/moses2/legacy/Parameter.cpp similarity index 100% rename from contrib/moses2/legacy/Parameter.cpp rename to moses2/legacy/Parameter.cpp diff --git a/contrib/moses2/legacy/Parameter.h b/moses2/legacy/Parameter.h similarity index 100% rename from contrib/moses2/legacy/Parameter.h rename to moses2/legacy/Parameter.h diff --git a/contrib/moses2/legacy/Range.cpp b/moses2/legacy/Range.cpp similarity index 100% rename from contrib/moses2/legacy/Range.cpp rename to moses2/legacy/Range.cpp diff --git a/contrib/moses2/legacy/Range.h b/moses2/legacy/Range.h similarity index 100% rename from contrib/moses2/legacy/Range.h rename to moses2/legacy/Range.h diff --git a/contrib/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp similarity index 100% rename from contrib/moses2/legacy/ThreadPool.cpp rename to moses2/legacy/ThreadPool.cpp diff --git a/contrib/moses2/legacy/ThreadPool.h b/moses2/legacy/ThreadPool.h similarity index 100% rename from contrib/moses2/legacy/ThreadPool.h rename to moses2/legacy/ThreadPool.h diff --git a/contrib/moses2/legacy/Timer.cpp b/moses2/legacy/Timer.cpp similarity index 100% rename from contrib/moses2/legacy/Timer.cpp rename to moses2/legacy/Timer.cpp diff --git a/contrib/moses2/legacy/Timer.h b/moses2/legacy/Timer.h similarity index 100% rename from contrib/moses2/legacy/Timer.h rename to moses2/legacy/Timer.h diff --git a/contrib/moses2/legacy/Util2.cpp b/moses2/legacy/Util2.cpp similarity index 100% rename from contrib/moses2/legacy/Util2.cpp rename to moses2/legacy/Util2.cpp diff --git a/contrib/moses2/legacy/Util2.h b/moses2/legacy/Util2.h similarity index 100% rename from contrib/moses2/legacy/Util2.h rename to moses2/legacy/Util2.h diff --git a/contrib/moses2/legacy/gzfilebuf.h b/moses2/legacy/gzfilebuf.h similarity index 100% rename from contrib/moses2/legacy/gzfilebuf.h rename to moses2/legacy/gzfilebuf.h diff --git a/contrib/moses2/parameters/AllOptions.cpp b/moses2/parameters/AllOptions.cpp similarity index 100% rename from contrib/moses2/parameters/AllOptions.cpp rename to moses2/parameters/AllOptions.cpp diff --git a/contrib/moses2/parameters/AllOptions.h b/moses2/parameters/AllOptions.h similarity index 100% rename from contrib/moses2/parameters/AllOptions.h rename to moses2/parameters/AllOptions.h diff --git a/contrib/moses2/parameters/BeamSearchOptions.h b/moses2/parameters/BeamSearchOptions.h similarity index 100% rename from contrib/moses2/parameters/BeamSearchOptions.h rename to moses2/parameters/BeamSearchOptions.h diff --git a/contrib/moses2/parameters/BookkeepingOptions.cpp b/moses2/parameters/BookkeepingOptions.cpp similarity index 100% rename from contrib/moses2/parameters/BookkeepingOptions.cpp rename to moses2/parameters/BookkeepingOptions.cpp diff --git a/contrib/moses2/parameters/BookkeepingOptions.h b/moses2/parameters/BookkeepingOptions.h similarity index 100% rename from contrib/moses2/parameters/BookkeepingOptions.h rename to moses2/parameters/BookkeepingOptions.h diff --git a/contrib/moses2/parameters/ContextParameters.cpp b/moses2/parameters/ContextParameters.cpp similarity index 100% rename from contrib/moses2/parameters/ContextParameters.cpp rename to moses2/parameters/ContextParameters.cpp diff --git a/contrib/moses2/parameters/ContextParameters.h b/moses2/parameters/ContextParameters.h similarity index 100% rename from contrib/moses2/parameters/ContextParameters.h rename to moses2/parameters/ContextParameters.h diff --git a/contrib/moses2/parameters/CubePruningOptions.cpp b/moses2/parameters/CubePruningOptions.cpp similarity index 100% rename from contrib/moses2/parameters/CubePruningOptions.cpp rename to moses2/parameters/CubePruningOptions.cpp diff --git a/contrib/moses2/parameters/CubePruningOptions.h b/moses2/parameters/CubePruningOptions.h similarity index 100% rename from contrib/moses2/parameters/CubePruningOptions.h rename to moses2/parameters/CubePruningOptions.h diff --git a/contrib/moses2/parameters/InputOptions.cpp b/moses2/parameters/InputOptions.cpp similarity index 100% rename from contrib/moses2/parameters/InputOptions.cpp rename to moses2/parameters/InputOptions.cpp diff --git a/contrib/moses2/parameters/InputOptions.h b/moses2/parameters/InputOptions.h similarity index 100% rename from contrib/moses2/parameters/InputOptions.h rename to moses2/parameters/InputOptions.h diff --git a/contrib/moses2/parameters/LMBR_Options.cpp b/moses2/parameters/LMBR_Options.cpp similarity index 100% rename from contrib/moses2/parameters/LMBR_Options.cpp rename to moses2/parameters/LMBR_Options.cpp diff --git a/contrib/moses2/parameters/LMBR_Options.h b/moses2/parameters/LMBR_Options.h similarity index 100% rename from contrib/moses2/parameters/LMBR_Options.h rename to moses2/parameters/LMBR_Options.h diff --git a/contrib/moses2/parameters/LookupOptions.h b/moses2/parameters/LookupOptions.h similarity index 100% rename from contrib/moses2/parameters/LookupOptions.h rename to moses2/parameters/LookupOptions.h diff --git a/contrib/moses2/parameters/MBR_Options.cpp b/moses2/parameters/MBR_Options.cpp similarity index 100% rename from contrib/moses2/parameters/MBR_Options.cpp rename to moses2/parameters/MBR_Options.cpp diff --git a/contrib/moses2/parameters/MBR_Options.h b/moses2/parameters/MBR_Options.h similarity index 100% rename from contrib/moses2/parameters/MBR_Options.h rename to moses2/parameters/MBR_Options.h diff --git a/contrib/moses2/parameters/NBestOptions.cpp b/moses2/parameters/NBestOptions.cpp similarity index 100% rename from contrib/moses2/parameters/NBestOptions.cpp rename to moses2/parameters/NBestOptions.cpp diff --git a/contrib/moses2/parameters/NBestOptions.h b/moses2/parameters/NBestOptions.h similarity index 100% rename from contrib/moses2/parameters/NBestOptions.h rename to moses2/parameters/NBestOptions.h diff --git a/contrib/moses2/parameters/OOVHandlingOptions.cpp b/moses2/parameters/OOVHandlingOptions.cpp similarity index 100% rename from contrib/moses2/parameters/OOVHandlingOptions.cpp rename to moses2/parameters/OOVHandlingOptions.cpp diff --git a/contrib/moses2/parameters/OOVHandlingOptions.h b/moses2/parameters/OOVHandlingOptions.h similarity index 100% rename from contrib/moses2/parameters/OOVHandlingOptions.h rename to moses2/parameters/OOVHandlingOptions.h diff --git a/contrib/moses2/parameters/OptionsBaseClass.cpp b/moses2/parameters/OptionsBaseClass.cpp similarity index 100% rename from contrib/moses2/parameters/OptionsBaseClass.cpp rename to moses2/parameters/OptionsBaseClass.cpp diff --git a/contrib/moses2/parameters/OptionsBaseClass.h b/moses2/parameters/OptionsBaseClass.h similarity index 100% rename from contrib/moses2/parameters/OptionsBaseClass.h rename to moses2/parameters/OptionsBaseClass.h diff --git a/contrib/moses2/parameters/ReorderingOptions.cpp b/moses2/parameters/ReorderingOptions.cpp similarity index 100% rename from contrib/moses2/parameters/ReorderingOptions.cpp rename to moses2/parameters/ReorderingOptions.cpp diff --git a/contrib/moses2/parameters/ReorderingOptions.h b/moses2/parameters/ReorderingOptions.h similarity index 100% rename from contrib/moses2/parameters/ReorderingOptions.h rename to moses2/parameters/ReorderingOptions.h diff --git a/contrib/moses2/parameters/ReportingOptions.cpp b/moses2/parameters/ReportingOptions.cpp similarity index 100% rename from contrib/moses2/parameters/ReportingOptions.cpp rename to moses2/parameters/ReportingOptions.cpp diff --git a/contrib/moses2/parameters/ReportingOptions.h b/moses2/parameters/ReportingOptions.h similarity index 100% rename from contrib/moses2/parameters/ReportingOptions.h rename to moses2/parameters/ReportingOptions.h diff --git a/contrib/moses2/parameters/SearchOptions.cpp b/moses2/parameters/SearchOptions.cpp similarity index 100% rename from contrib/moses2/parameters/SearchOptions.cpp rename to moses2/parameters/SearchOptions.cpp diff --git a/contrib/moses2/parameters/SearchOptions.h b/moses2/parameters/SearchOptions.h similarity index 100% rename from contrib/moses2/parameters/SearchOptions.h rename to moses2/parameters/SearchOptions.h diff --git a/contrib/moses2/parameters/ServerOptions.cpp b/moses2/parameters/ServerOptions.cpp similarity index 100% rename from contrib/moses2/parameters/ServerOptions.cpp rename to moses2/parameters/ServerOptions.cpp diff --git a/contrib/moses2/parameters/ServerOptions.h b/moses2/parameters/ServerOptions.h similarity index 100% rename from contrib/moses2/parameters/ServerOptions.h rename to moses2/parameters/ServerOptions.h diff --git a/contrib/moses2/parameters/SyntaxOptions.cpp b/moses2/parameters/SyntaxOptions.cpp similarity index 100% rename from contrib/moses2/parameters/SyntaxOptions.cpp rename to moses2/parameters/SyntaxOptions.cpp diff --git a/contrib/moses2/parameters/SyntaxOptions.h b/moses2/parameters/SyntaxOptions.h similarity index 100% rename from contrib/moses2/parameters/SyntaxOptions.h rename to moses2/parameters/SyntaxOptions.h diff --git a/contrib/moses2/pugiconfig.hpp b/moses2/pugiconfig.hpp similarity index 100% rename from contrib/moses2/pugiconfig.hpp rename to moses2/pugiconfig.hpp diff --git a/contrib/moses2/pugixml.cpp b/moses2/pugixml.cpp similarity index 100% rename from contrib/moses2/pugixml.cpp rename to moses2/pugixml.cpp diff --git a/contrib/moses2/pugixml.hpp b/moses2/pugixml.hpp similarity index 100% rename from contrib/moses2/pugixml.hpp rename to moses2/pugixml.hpp diff --git a/contrib/moses2/server/Server.cpp b/moses2/server/Server.cpp similarity index 100% rename from contrib/moses2/server/Server.cpp rename to moses2/server/Server.cpp diff --git a/contrib/moses2/server/Server.h b/moses2/server/Server.h similarity index 100% rename from contrib/moses2/server/Server.h rename to moses2/server/Server.h diff --git a/contrib/moses2/server/TranslationRequest.cpp b/moses2/server/TranslationRequest.cpp similarity index 100% rename from contrib/moses2/server/TranslationRequest.cpp rename to moses2/server/TranslationRequest.cpp diff --git a/contrib/moses2/server/TranslationRequest.h b/moses2/server/TranslationRequest.h similarity index 100% rename from contrib/moses2/server/TranslationRequest.h rename to moses2/server/TranslationRequest.h diff --git a/contrib/moses2/server/Translator.cpp b/moses2/server/Translator.cpp similarity index 100% rename from contrib/moses2/server/Translator.cpp rename to moses2/server/Translator.cpp diff --git a/contrib/moses2/server/Translator.h b/moses2/server/Translator.h similarity index 100% rename from contrib/moses2/server/Translator.h rename to moses2/server/Translator.h From eb95cab116b646b02e10061b0d5d90d6951589a8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 22:54:23 +0000 Subject: [PATCH 102/176] eclipse --- .../other-builds/moses2-cmd}/.cproject | 0 .../other-builds/moses2-cmd}/.project | 4 +- .../other-builds/moses2}/.cproject | 0 contrib/other-builds/moses2/.project | 1731 +++++++++++++++++ moses2/.project | 29 - 5 files changed, 1733 insertions(+), 31 deletions(-) rename {moses2-cmd => contrib/other-builds/moses2-cmd}/.cproject (100%) rename {moses2-cmd => contrib/other-builds/moses2-cmd}/.project (89%) rename {moses2 => contrib/other-builds/moses2}/.cproject (100%) create mode 100644 contrib/other-builds/moses2/.project delete mode 100644 moses2/.project diff --git a/moses2-cmd/.cproject b/contrib/other-builds/moses2-cmd/.cproject similarity index 100% rename from moses2-cmd/.cproject rename to contrib/other-builds/moses2-cmd/.cproject diff --git a/moses2-cmd/.project b/contrib/other-builds/moses2-cmd/.project similarity index 89% rename from moses2-cmd/.project rename to contrib/other-builds/moses2-cmd/.project index 5e0e0e2b1..84591d671 100644 --- a/moses2-cmd/.project +++ b/contrib/other-builds/moses2-cmd/.project @@ -32,12 +32,12 @@ Main.cpp 1 - PARENT-1-PROJECT_LOC/moses2/Main.cpp + PARENT-3-PROJECT_LOC/moses2/Main.cpp Main.h 1 - PARENT-1-PROJECT_LOC/moses2/Main.h + PARENT-3-PROJECT_LOC/moses2/Main.h diff --git a/moses2/.cproject b/contrib/other-builds/moses2/.cproject similarity index 100% rename from moses2/.cproject rename to contrib/other-builds/moses2/.cproject diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project new file mode 100644 index 000000000..8142f8b63 --- /dev/null +++ b/contrib/other-builds/moses2/.project @@ -0,0 +1,1731 @@ + + + moses2 + + + moses + util + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + AlignmentInfo.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/AlignmentInfo.cpp + + + AlignmentInfo.h + 1 + PARENT-3-PROJECT_LOC/moses2/AlignmentInfo.h + + + AlignmentInfoCollection.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/AlignmentInfoCollection.cpp + + + AlignmentInfoCollection.h + 1 + PARENT-3-PROJECT_LOC/moses2/AlignmentInfoCollection.h + + + ArcLists.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/ArcLists.cpp + + + ArcLists.h + 1 + PARENT-3-PROJECT_LOC/moses2/ArcLists.h + + + Array.h + 1 + PARENT-3-PROJECT_LOC/moses2/Array.h + + + EstimatedScores.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/EstimatedScores.cpp + + + EstimatedScores.h + 1 + PARENT-3-PROJECT_LOC/moses2/EstimatedScores.h + + + FF + 2 + virtual:/virtual + + + HypothesisBase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/HypothesisBase.cpp + + + HypothesisBase.h + 1 + PARENT-3-PROJECT_LOC/moses2/HypothesisBase.h + + + HypothesisColl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/HypothesisColl.cpp + + + HypothesisColl.h + 1 + PARENT-3-PROJECT_LOC/moses2/HypothesisColl.h + + + InputPathBase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/InputPathBase.cpp + + + InputPathBase.h + 1 + PARENT-3-PROJECT_LOC/moses2/InputPathBase.h + + + InputPathsBase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/InputPathsBase.cpp + + + InputPathsBase.h + 1 + PARENT-3-PROJECT_LOC/moses2/InputPathsBase.h + + + InputType.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/InputType.cpp + + + InputType.h + 1 + PARENT-3-PROJECT_LOC/moses2/InputType.h + + + Jamfile + 1 + PARENT-3-PROJECT_LOC/moses2/Jamfile + + + LM + 2 + virtual:/virtual + + + Main.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Main.cpp + + + ManagerBase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/ManagerBase.cpp + + + ManagerBase.h + 1 + PARENT-3-PROJECT_LOC/moses2/ManagerBase.h + + + MemPool.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/MemPool.cpp + + + MemPool.h + 1 + PARENT-3-PROJECT_LOC/moses2/MemPool.h + + + MemPoolAllocator.h + 1 + PARENT-3-PROJECT_LOC/moses2/MemPoolAllocator.h + + + MorphoTrie + 2 + virtual:/virtual + + + Phrase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Phrase.cpp + + + Phrase.h + 1 + PARENT-3-PROJECT_LOC/moses2/Phrase.h + + + PhraseBased + 2 + virtual:/virtual + + + PhraseImplTemplate.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseImplTemplate.h + + + Recycler.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Recycler.cpp + + + Recycler.h + 1 + PARENT-3-PROJECT_LOC/moses2/Recycler.h + + + SCFG + 2 + virtual:/virtual + + + Scores.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Scores.cpp + + + Scores.h + 1 + PARENT-3-PROJECT_LOC/moses2/Scores.h + + + SubPhrase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SubPhrase.cpp + + + SubPhrase.h + 1 + PARENT-3-PROJECT_LOC/moses2/SubPhrase.h + + + System.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/System.cpp + + + System.h + 1 + PARENT-3-PROJECT_LOC/moses2/System.h + + + TargetPhrase.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TargetPhrase.cpp + + + TargetPhrase.h + 1 + PARENT-3-PROJECT_LOC/moses2/TargetPhrase.h + + + TranslationModel + 2 + virtual:/virtual + + + TranslationTask.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationTask.cpp + + + TranslationTask.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationTask.h + + + TrellisPaths.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TrellisPaths.cpp + + + TrellisPaths.h + 1 + PARENT-3-PROJECT_LOC/moses2/TrellisPaths.h + + + TypeDef.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TypeDef.cpp + + + TypeDef.h + 1 + PARENT-3-PROJECT_LOC/moses2/TypeDef.h + + + Vector.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Vector.cpp + + + Vector.h + 1 + PARENT-3-PROJECT_LOC/moses2/Vector.h + + + Weights.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Weights.cpp + + + Weights.h + 1 + PARENT-3-PROJECT_LOC/moses2/Weights.h + + + Word.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/Word.cpp + + + Word.h + 1 + PARENT-3-PROJECT_LOC/moses2/Word.h + + + defer + 2 + virtual:/virtual + + + legacy + 2 + virtual:/virtual + + + parameters + 2 + virtual:/virtual + + + pugiconfig.hpp + 1 + PARENT-3-PROJECT_LOC/moses2/pugiconfig.hpp + + + pugixml.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/pugixml.cpp + + + pugixml.hpp + 1 + PARENT-3-PROJECT_LOC/moses2/pugixml.hpp + + + server + 2 + virtual:/virtual + + + FF/Distortion.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/Distortion.cpp + + + FF/Distortion.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/Distortion.h + + + FF/FFState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FFState.cpp + + + FF/FFState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FFState.h + + + FF/FeatureFunction.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunction.cpp + + + FF/FeatureFunction.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunction.h + + + FF/FeatureFunctions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunctions.cpp + + + FF/FeatureFunctions.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureFunctions.h + + + FF/FeatureRegistry.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureRegistry.cpp + + + FF/FeatureRegistry.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/FeatureRegistry.h + + + FF/LexicalReordering + 2 + virtual:/virtual + + + FF/OSM + 2 + virtual:/virtual + + + FF/PhrasePenalty.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/PhrasePenalty.cpp + + + FF/PhrasePenalty.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/PhrasePenalty.h + + + FF/PointerState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/PointerState.cpp + + + FF/PointerState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/PointerState.h + + + FF/SkeletonStatefulFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatefulFF.cpp + + + FF/SkeletonStatefulFF.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatefulFF.h + + + FF/SkeletonStatelessFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatelessFF.cpp + + + FF/SkeletonStatelessFF.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatelessFF.h + + + FF/StatefulFeatureFunction.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/StatefulFeatureFunction.cpp + + + FF/StatefulFeatureFunction.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/StatefulFeatureFunction.h + + + FF/StatelessFeatureFunction.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/StatelessFeatureFunction.cpp + + + FF/StatelessFeatureFunction.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/StatelessFeatureFunction.h + + + FF/WordPenalty.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/WordPenalty.cpp + + + FF/WordPenalty.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/WordPenalty.h + + + LM/GPULM.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/LM/GPULM.cpp + + + LM/GPULM.h + 1 + PARENT-3-PROJECT_LOC/moses2/LM/GPULM.h + + + LM/KENLM.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/LM/KENLM.cpp + + + LM/KENLM.h + 1 + PARENT-3-PROJECT_LOC/moses2/LM/KENLM.h + + + LM/KENLMBatch.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/LM/KENLMBatch.cpp + + + LM/KENLMBatch.h + 1 + PARENT-3-PROJECT_LOC/moses2/LM/KENLMBatch.h + + + LM/LanguageModel.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.cpp + + + LM/LanguageModel.h + 1 + PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.h + + + LM/LanguageModelDALM.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/LM/LanguageModelDALM.cpp + + + LM/LanguageModelDALM.h + 1 + PARENT-3-PROJECT_LOC/moses2/LM/LanguageModelDALM.h + + + MorphoTrie/MorphTrie.h + 1 + PARENT-3-PROJECT_LOC/moses2/MorphoTrie/MorphTrie.h + + + MorphoTrie/Node.h + 1 + PARENT-3-PROJECT_LOC/moses2/MorphoTrie/Node.h + + + MorphoTrie/utils.h + 1 + PARENT-3-PROJECT_LOC/moses2/MorphoTrie/utils.h + + + PhraseBased/CubePruningMiniStack + 2 + virtual:/virtual + + + PhraseBased/Hypothesis.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Hypothesis.cpp + + + PhraseBased/Hypothesis.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Hypothesis.h + + + PhraseBased/InputPath.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPath.cpp + + + PhraseBased/InputPath.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPath.h + + + PhraseBased/InputPaths.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPaths.cpp + + + PhraseBased/InputPaths.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/InputPaths.h + + + PhraseBased/Manager.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Manager.cpp + + + PhraseBased/Manager.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Manager.h + + + PhraseBased/Normal + 2 + virtual:/virtual + + + PhraseBased/PhraseImpl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/PhraseImpl.cpp + + + PhraseBased/PhraseImpl.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/PhraseImpl.h + + + PhraseBased/ReorderingConstraint.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/ReorderingConstraint.cpp + + + PhraseBased/ReorderingConstraint.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/ReorderingConstraint.h + + + PhraseBased/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Search.cpp + + + PhraseBased/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Search.h + + + PhraseBased/Sentence.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Sentence.cpp + + + PhraseBased/Sentence.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Sentence.h + + + PhraseBased/TargetPhraseImpl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhraseImpl.cpp + + + PhraseBased/TargetPhraseImpl.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhraseImpl.h + + + PhraseBased/TargetPhrases.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhrases.cpp + + + PhraseBased/TargetPhrases.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TargetPhrases.h + + + PhraseBased/TrellisPath.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TrellisPath.cpp + + + PhraseBased/TrellisPath.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/TrellisPath.h + + + SCFG/ActiveChart.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/ActiveChart.cpp + + + SCFG/ActiveChart.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/ActiveChart.h + + + SCFG/Hypothesis.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Hypothesis.cpp + + + SCFG/Hypothesis.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Hypothesis.h + + + SCFG/InputPath.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/InputPath.cpp + + + SCFG/InputPath.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/InputPath.h + + + SCFG/InputPaths.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/InputPaths.cpp + + + SCFG/InputPaths.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/InputPaths.h + + + SCFG/Manager.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Manager.cpp + + + SCFG/Manager.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Manager.h + + + SCFG/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Misc.cpp + + + SCFG/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Misc.h + + + SCFG/PhraseImpl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/PhraseImpl.cpp + + + SCFG/PhraseImpl.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/PhraseImpl.h + + + SCFG/Sentence.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Sentence.cpp + + + SCFG/Sentence.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Sentence.h + + + SCFG/Stack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Stack.cpp + + + SCFG/Stack.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Stack.h + + + SCFG/Stacks.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Stacks.cpp + + + SCFG/Stacks.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Stacks.h + + + SCFG/TargetPhraseImpl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhraseImpl.cpp + + + SCFG/TargetPhraseImpl.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhraseImpl.h + + + SCFG/TargetPhrases.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhrases.cpp + + + SCFG/TargetPhrases.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/TargetPhrases.h + + + SCFG/Word.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Word.cpp + + + SCFG/Word.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/Word.h + + + SCFG/nbest + 2 + virtual:/virtual + + + TranslationModel/CompactPT + 2 + virtual:/virtual + + + TranslationModel/Memory + 2 + virtual:/virtual + + + TranslationModel/PhraseTable.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/PhraseTable.cpp + + + TranslationModel/PhraseTable.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/PhraseTable.h + + + TranslationModel/ProbingPT + 2 + virtual:/virtual + + + TranslationModel/Transliteration.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/Transliteration.cpp + + + TranslationModel/Transliteration.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/Transliteration.h + + + TranslationModel/UnknownWordPenalty.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/UnknownWordPenalty.cpp + + + TranslationModel/UnknownWordPenalty.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/UnknownWordPenalty.h + + + defer/CubePruningBitmapStack + 2 + virtual:/virtual + + + defer/CubePruningCardinalStack + 2 + virtual:/virtual + + + defer/CubePruningPerBitmap + 2 + virtual:/virtual + + + defer/CubePruningPerMiniStack + 2 + virtual:/virtual + + + legacy/Bitmap.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Bitmap.cpp + + + legacy/Bitmap.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Bitmap.h + + + legacy/Bitmaps.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Bitmaps.cpp + + + legacy/Bitmaps.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Bitmaps.h + + + legacy/Factor.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Factor.cpp + + + legacy/Factor.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Factor.h + + + legacy/FactorCollection.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/FactorCollection.cpp + + + legacy/FactorCollection.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/FactorCollection.h + + + legacy/InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/InputFileStream.cpp + + + legacy/InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/InputFileStream.h + + + legacy/Matrix.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Matrix.cpp + + + legacy/Matrix.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Matrix.h + + + legacy/OutputCollector.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/OutputCollector.h + + + legacy/OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/OutputFileStream.cpp + + + legacy/OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/OutputFileStream.h + + + legacy/Parameter.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Parameter.cpp + + + legacy/Parameter.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Parameter.h + + + legacy/Range.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Range.cpp + + + legacy/Range.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Range.h + + + legacy/ThreadPool.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/ThreadPool.cpp + + + legacy/ThreadPool.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/ThreadPool.h + + + legacy/Timer.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Timer.cpp + + + legacy/Timer.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Timer.h + + + legacy/Util2.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Util2.cpp + + + legacy/Util2.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/Util2.h + + + legacy/gzfilebuf.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/gzfilebuf.h + + + parameters/AllOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/AllOptions.cpp + + + parameters/AllOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/AllOptions.h + + + parameters/BeamSearchOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/BeamSearchOptions.h + + + parameters/BookkeepingOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/BookkeepingOptions.cpp + + + parameters/BookkeepingOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/BookkeepingOptions.h + + + parameters/ContextParameters.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ContextParameters.cpp + + + parameters/ContextParameters.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ContextParameters.h + + + parameters/CubePruningOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/CubePruningOptions.cpp + + + parameters/CubePruningOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/CubePruningOptions.h + + + parameters/InputOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/InputOptions.cpp + + + parameters/InputOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/InputOptions.h + + + parameters/LMBR_Options.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/LMBR_Options.cpp + + + parameters/LMBR_Options.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/LMBR_Options.h + + + parameters/LookupOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/LookupOptions.h + + + parameters/MBR_Options.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/MBR_Options.cpp + + + parameters/MBR_Options.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/MBR_Options.h + + + parameters/NBestOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/NBestOptions.cpp + + + parameters/NBestOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/NBestOptions.h + + + parameters/OOVHandlingOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/OOVHandlingOptions.cpp + + + parameters/OOVHandlingOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/OOVHandlingOptions.h + + + parameters/OptionsBaseClass.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/OptionsBaseClass.cpp + + + parameters/OptionsBaseClass.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/OptionsBaseClass.h + + + parameters/ReorderingOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ReorderingOptions.cpp + + + parameters/ReorderingOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ReorderingOptions.h + + + parameters/ReportingOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ReportingOptions.cpp + + + parameters/ReportingOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ReportingOptions.h + + + parameters/SearchOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/SearchOptions.cpp + + + parameters/SearchOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/SearchOptions.h + + + parameters/ServerOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ServerOptions.cpp + + + parameters/ServerOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/ServerOptions.h + + + parameters/SyntaxOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/SyntaxOptions.cpp + + + parameters/SyntaxOptions.h + 1 + PARENT-3-PROJECT_LOC/moses2/parameters/SyntaxOptions.h + + + server/Server.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/server/Server.cpp + + + server/Server.h + 1 + PARENT-3-PROJECT_LOC/moses2/server/Server.h + + + server/TranslationRequest.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/server/TranslationRequest.cpp + + + server/TranslationRequest.h + 1 + PARENT-3-PROJECT_LOC/moses2/server/TranslationRequest.h + + + server/Translator.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/server/Translator.cpp + + + server/Translator.h + 1 + PARENT-3-PROJECT_LOC/moses2/server/Translator.h + + + FF/LexicalReordering/BidirectionalReorderingState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp + + + FF/LexicalReordering/BidirectionalReorderingState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/BidirectionalReorderingState.h + + + FF/LexicalReordering/HReorderingBackwardState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp + + + FF/LexicalReordering/HReorderingBackwardState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingBackwardState.h + + + FF/LexicalReordering/HReorderingForwardState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingForwardState.cpp + + + FF/LexicalReordering/HReorderingForwardState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/HReorderingForwardState.h + + + FF/LexicalReordering/LRModel.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRModel.cpp + + + FF/LexicalReordering/LRModel.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRModel.h + + + FF/LexicalReordering/LRState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRState.cpp + + + FF/LexicalReordering/LRState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LRState.h + + + FF/LexicalReordering/LexicalReordering.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LexicalReordering.cpp + + + FF/LexicalReordering/LexicalReordering.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/LexicalReordering.h + + + FF/LexicalReordering/PhraseBasedReorderingState.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp + + + FF/LexicalReordering/PhraseBasedReorderingState.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h + + + FF/LexicalReordering/ReorderingStack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/ReorderingStack.cpp + + + FF/LexicalReordering/ReorderingStack.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/LexicalReordering/ReorderingStack.h + + + FF/OSM/KenOSM.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/KenOSM.cpp + + + FF/OSM/KenOSM.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/KenOSM.h + + + FF/OSM/OpSequenceModel.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/OpSequenceModel.cpp + + + FF/OSM/OpSequenceModel.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/OpSequenceModel.h + + + FF/OSM/osmHyp.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/osmHyp.cpp + + + FF/OSM/osmHyp.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/OSM/osmHyp.h + + + PhraseBased/CubePruningMiniStack/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp + + + PhraseBased/CubePruningMiniStack/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Misc.h + + + PhraseBased/CubePruningMiniStack/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Search.cpp + + + PhraseBased/CubePruningMiniStack/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Search.h + + + PhraseBased/CubePruningMiniStack/Stack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp + + + PhraseBased/CubePruningMiniStack/Stack.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/CubePruningMiniStack/Stack.h + + + PhraseBased/Normal/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Search.cpp + + + PhraseBased/Normal/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Search.h + + + PhraseBased/Normal/Stack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stack.cpp + + + PhraseBased/Normal/Stack.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stack.h + + + PhraseBased/Normal/Stacks.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stacks.cpp + + + PhraseBased/Normal/Stacks.h + 1 + PARENT-3-PROJECT_LOC/moses2/PhraseBased/Normal/Stacks.h + + + SCFG/nbest/KBestExtractor.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/KBestExtractor.cpp + + + SCFG/nbest/KBestExtractor.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/KBestExtractor.h + + + SCFG/nbest/NBest.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBest.cpp + + + SCFG/nbest/NBest.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBest.h + + + SCFG/nbest/NBestColl.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBestColl.cpp + + + SCFG/nbest/NBestColl.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBestColl.h + + + SCFG/nbest/NBests.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBests.cpp + + + SCFG/nbest/NBests.h + 1 + PARENT-3-PROJECT_LOC/moses2/SCFG/nbest/NBests.h + + + TranslationModel/CompactPT/BlockHashIndex.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp + + + TranslationModel/CompactPT/BlockHashIndex.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/BlockHashIndex.h + + + TranslationModel/CompactPT/CanonicalHuffman.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CanonicalHuffman.h + + + TranslationModel/CompactPT/CmphStringVectorAdapter.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp + + + TranslationModel/CompactPT/CmphStringVectorAdapter.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h + + + TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp + + + TranslationModel/CompactPT/LexicalReorderingTableCompact.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h + + + TranslationModel/CompactPT/ListCoders.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ListCoders.h + + + TranslationModel/CompactPT/MmapAllocator.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MmapAllocator.h + + + TranslationModel/CompactPT/MonotonicVector.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MonotonicVector.h + + + TranslationModel/CompactPT/MurmurHash3.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MurmurHash3.cpp + + + TranslationModel/CompactPT/MurmurHash3.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/MurmurHash3.h + + + TranslationModel/CompactPT/PackedArray.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PackedArray.h + + + TranslationModel/CompactPT/PhraseDecoder.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp + + + TranslationModel/CompactPT/PhraseDecoder.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseDecoder.h + + + TranslationModel/CompactPT/PhraseTableCompact.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp + + + TranslationModel/CompactPT/PhraseTableCompact.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseTableCompact.h + + + TranslationModel/CompactPT/StringVector.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/StringVector.h + + + TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp + + + TranslationModel/CompactPT/TargetPhraseCollectionCache.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h + + + TranslationModel/CompactPT/ThrowingFwrite.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ThrowingFwrite.cpp + + + TranslationModel/CompactPT/ThrowingFwrite.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/ThrowingFwrite.h + + + TranslationModel/Memory/Node.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/Node.h + + + TranslationModel/Memory/PhraseTableMemory.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/PhraseTableMemory.cpp + + + TranslationModel/Memory/PhraseTableMemory.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/PhraseTableMemory.h + + + TranslationModel/ProbingPT/ProbingPT.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/ProbingPT.cpp + + + TranslationModel/ProbingPT/ProbingPT.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/ProbingPT.h + + + TranslationModel/ProbingPT/StoreTarget.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreTarget.cpp + + + TranslationModel/ProbingPT/StoreTarget.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreTarget.h + + + TranslationModel/ProbingPT/StoreVocab.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreVocab.cpp + + + TranslationModel/ProbingPT/StoreVocab.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreVocab.h + + + TranslationModel/ProbingPT/hash.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/hash.cpp + + + TranslationModel/ProbingPT/hash.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/hash.hh + + + TranslationModel/ProbingPT/line_splitter.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/line_splitter.cpp + + + TranslationModel/ProbingPT/line_splitter.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/line_splitter.hh + + + TranslationModel/ProbingPT/probing_hash_utils.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp + + + TranslationModel/ProbingPT/probing_hash_utils.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh + + + TranslationModel/ProbingPT/querying.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/querying.cpp + + + TranslationModel/ProbingPT/querying.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/querying.hh + + + TranslationModel/ProbingPT/storing.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/storing.cpp + + + TranslationModel/ProbingPT/storing.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/storing.hh + + + TranslationModel/ProbingPT/vocabid.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/vocabid.cpp + + + TranslationModel/ProbingPT/vocabid.hh + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/vocabid.hh + + + defer/CubePruningBitmapStack/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Misc.cpp + + + defer/CubePruningBitmapStack/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Misc.h + + + defer/CubePruningBitmapStack/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Search.cpp + + + defer/CubePruningBitmapStack/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Search.h + + + defer/CubePruningBitmapStack/Stack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Stack.cpp + + + defer/CubePruningBitmapStack/Stack.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningBitmapStack/Stack.h + + + defer/CubePruningCardinalStack/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Misc.cpp + + + defer/CubePruningCardinalStack/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Misc.h + + + defer/CubePruningCardinalStack/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Search.cpp + + + defer/CubePruningCardinalStack/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Search.h + + + defer/CubePruningCardinalStack/Stack.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Stack.cpp + + + defer/CubePruningCardinalStack/Stack.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningCardinalStack/Stack.h + + + defer/CubePruningPerBitmap/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Misc.cpp + + + defer/CubePruningPerBitmap/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Misc.h + + + defer/CubePruningPerBitmap/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Search.cpp + + + defer/CubePruningPerBitmap/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Search.h + + + defer/CubePruningPerBitmap/Stacks.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Stacks.cpp + + + defer/CubePruningPerBitmap/Stacks.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerBitmap/Stacks.h + + + defer/CubePruningPerMiniStack/Misc.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Misc.cpp + + + defer/CubePruningPerMiniStack/Misc.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Misc.h + + + defer/CubePruningPerMiniStack/Search.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Search.cpp + + + defer/CubePruningPerMiniStack/Search.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Search.h + + + defer/CubePruningPerMiniStack/Stacks.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Stacks.cpp + + + defer/CubePruningPerMiniStack/Stacks.h + 1 + PARENT-3-PROJECT_LOC/moses2/defer/CubePruningPerMiniStack/Stacks.h + + + diff --git a/moses2/.project b/moses2/.project deleted file mode 100644 index b17dc477e..000000000 --- a/moses2/.project +++ /dev/null @@ -1,29 +0,0 @@ - - - moses2 - - - moses - util - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - From 5118e219cd1474072be9acf3576a3463250fadb0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 23:20:40 +0000 Subject: [PATCH 103/176] moses compiles --- contrib/other-builds/moses/.cproject | 3 +- contrib/other-builds/moses/.project | 935 +++++++++++++++++---------- 2 files changed, 581 insertions(+), 357 deletions(-) diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 0e654af8b..a4ad4bfd1 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -36,7 +36,6 @@ diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index adb9ad47e..de065158d 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -2150,11 +2150,6 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionary.h - - TranslationModel/PhraseDictionaryDynSuffixArray.README - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README - TranslationModel/PhraseDictionaryDynamicCacheBased.cpp 1 @@ -3330,6 +3325,11 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVector.h + + TranslationModel/CompactPT/StringVectorTemp.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/StringVectorTemp.h + TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp 1 @@ -3390,6 +3390,11 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreVocab.h + + TranslationModel/ProbingPT/bin + 2 + virtual:/virtual + TranslationModel/ProbingPT/hash.cpp 1 @@ -3645,11 +3650,46 @@ 2 virtual:/virtual + + TranslationModel/UG/bitext-find.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bitext-find.cc + + + TranslationModel/UG/check-coverage.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage.cc + + + TranslationModel/UG/check-coverage2.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage2.cc + + + TranslationModel/UG/check-coverage3.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage3.cc + + + TranslationModel/UG/check-coverage5.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/check-coverage5.cc + TranslationModel/UG/count-ptable-features.cc 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/count-ptable-features.cc + + TranslationModel/UG/filter-pt.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/filter-pt.cc + + + TranslationModel/UG/fuzzy.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/fuzzy.cc + TranslationModel/UG/generic 2 @@ -3705,6 +3745,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_coherence.h + + TranslationModel/UG/sapt_pscore_cumulative_bias.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h + + + TranslationModel/UG/sapt_pscore_length_ratio.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_length_ratio.h + TranslationModel/UG/sapt_pscore_lex1.h 1 @@ -3770,6 +3820,26 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage3.cc + + TranslationModel/UG/test-boost-threadpool.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-boost-threadpool.cc + + + TranslationModel/UG/test-domspec.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-domspec.cc + + + TranslationModel/UG/test-iptr.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-iptr.cc + + + TranslationModel/UG/test-ranked-phrase-lookup.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc + TranslationModel/UG/try-align.cc 1 @@ -3881,12 +3951,17 @@ virtual:/virtual - TranslationModel/CompactPT/bin/gcc-4.8 + TranslationModel/CompactPT/bin/darwin-4.2.1 2 virtual:/virtual - TranslationModel/UG/bin/gcc-4.8 + TranslationModel/ProbingPT/bin/darwin-4.2.1 + 2 + virtual:/virtual + + + TranslationModel/UG/bin/darwin-4.2.1 2 virtual:/virtual @@ -3936,9 +4011,9 @@ PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Jamfile - TranslationModel/UG/mm/Makefile + TranslationModel/UG/mm/Makefile.x 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile.x TranslationModel/UG/mm/bin @@ -4436,17 +4511,22 @@ PARENT-3-PROJECT_LOC/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h - TranslationModel/CompactPT/bin/gcc-4.8/release + TranslationModel/CompactPT/bin/darwin-4.2.1/release 2 virtual:/virtual - TranslationModel/UG/bin/gcc-4.8/release + TranslationModel/ProbingPT/bin/darwin-4.2.1/release 2 virtual:/virtual - TranslationModel/UG/generic/bin/gcc-4.8 + TranslationModel/UG/bin/darwin-4.2.1/release + 2 + virtual:/virtual + + + TranslationModel/UG/generic/bin/darwin-4.2.1 2 virtual:/virtual @@ -4531,12 +4611,7 @@ PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h - TranslationModel/UG/mm/bin/clang-darwin-4.2.1 - 2 - virtual:/virtual - - - TranslationModel/UG/mm/bin/gcc-4.8 + TranslationModel/UG/mm/bin/darwin-4.2.1 2 virtual:/virtual @@ -4561,27 +4636,42 @@ virtual:/virtual - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on 2 virtual:/virtual - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static 2 virtual:/virtual - TranslationModel/UG/generic/bin/gcc-4.8/release + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on 2 virtual:/virtual - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static 2 virtual:/virtual - TranslationModel/UG/mm/bin/gcc-4.8/release + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on + 2 + virtual:/virtual + + + TranslationModel/UG/bin/darwin-4.2.1/release/link-static + 2 + virtual:/virtual + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release + 2 + virtual:/virtual + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release 2 virtual:/virtual @@ -4601,27 +4691,52 @@ virtual:/virtual - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static 2 virtual:/virtual - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static 2 virtual:/virtual - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static + 2 + virtual:/virtual + + + TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi + 2 + virtual:/virtual + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on + 2 + virtual:/virtual + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static + 2 + virtual:/virtual + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on + 2 + virtual:/virtual + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static 2 virtual:/virtual @@ -5181,27 +5296,157 @@ PARENT-3-PROJECT_LOC/moses/LM/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Rand.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/BlockHashIndex.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/BlockHashIndex.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/CmphStringVectorAdapter.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/CmphStringVectorAdapter.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCompact.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCompact.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCreator.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/LexicalReorderingTableCreator.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/MurmurHash3.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/MurmurHash3.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDecoder.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDecoder.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDictionaryCompact.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseDictionaryCompact.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseTableCreator.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/PhraseTableCreator.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o + + + TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/ThrowingFwrite.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/ThrowingFwrite.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/ProbingPT.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/ProbingPT.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreTarget.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreTarget.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreVocab.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreVocab.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/hash.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/hash.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/huffmanish.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/huffmanish.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/line_splitter.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/line_splitter.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/probing_hash_utils.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/probing_hash_utils.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/quering.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/quering.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/querying.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/querying.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/storing.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/storing.o + + + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/vocabid.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/vocabid.o + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static + TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/TargetPhraseCollectionCache.o + + + TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt.o + + + TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt_align.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/link-static/threading-multi/mmsapt_align.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static 2 virtual:/virtual - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi + 2 + virtual:/virtual + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static + 2 + virtual:/virtual + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi 2 virtual:/virtual @@ -5686,134 +5931,309 @@ PARENT-3-PROJECT_LOC/moses/LM/bin/BackwardTest.test/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SkeletonLM.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BlockHashIndex.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/CmphStringVectorAdapter.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCompact.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTableCreator.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MurmurHash3.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDecoder.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryCompact.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseTableCreator.o - TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o + TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ProbingPT.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ProbingPT.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt.o + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/huffmanish.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/huffmanish.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/line_splitter.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/line_splitter.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/probing_hash_utils.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/probing_hash_utils.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quering.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quering.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/storing.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/storing.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus + TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocabid.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocabid.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-describe-features.o - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align + + + TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/try-align.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_get_options.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_get_options.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_splice_arglist.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_splice_arglist.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_stream.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_stream.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_pool.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_pool.o + + + TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_safe_counter.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/link-static/threading-multi/ug_thread_safe_counter.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 virtual:/virtual - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi - 2 - virtual:/virtual + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mmlex-build.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/mtt-build.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/num_read_write.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/num_read_write.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/symal2mam.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_pickler.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_pickler.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tightindex.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tightindex.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tokenindex.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/tpt_tokenindex.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_jstats.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_jstats.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_pstats.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_bitext_pstats.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_conll_record.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_conll_record.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_corpus_token.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_corpus_token.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_deptree.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_deptree.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_http_client.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_http_client.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_im_bitext.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_im_bitext.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_lexical_reordering.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_lexical_reordering.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_load_primer.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_load_primer.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_phrasepair.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_phrasepair.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_sampling_bias.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_sampling_bias.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_tsa_array_entry.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_tsa_array_entry.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_base.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_base.o + + + TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_position.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/link-static/threading-multi/ug_ttrack_position.o bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FF/LexicalReordering/LexicalReordering.o @@ -6046,389 +6466,194 @@ PARENT-3-PROJECT_LOC/moses/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TranslationModel/fuzzy-match/create_xml.o - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_stream.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_stream.o - TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o + TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o + TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o - - - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o - - - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o - - - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o - - - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o - - - TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1 - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/num_read_write.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/num_read_write.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o - - - TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Syntax/S2T/Parsers/Scope3Parser From aec2d51ce507913c39fb260d266c8353d81351cd Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 31 Jan 2017 23:35:01 +0000 Subject: [PATCH 104/176] moses2 compiles --- contrib/other-builds/moses2-cmd/.cproject | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/other-builds/moses2-cmd/.cproject b/contrib/other-builds/moses2-cmd/.cproject index 9f4548c68..bb8b44994 100644 --- a/contrib/other-builds/moses2-cmd/.cproject +++ b/contrib/other-builds/moses2-cmd/.cproject @@ -76,7 +76,7 @@ - + @@ -163,10 +163,10 @@ - + - + From adbb1c897a9ed019212dd7fc04e9a847e402e2d7 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Wed, 1 Feb 2017 00:10:16 +0000 Subject: [PATCH 105/176] daily automatic beautifier --- moses/Util.h | 2 +- moses2/ArcLists.cpp | 128 ++++++++++++++++++------------------- moses2/ArcLists.h | 2 +- moses2/Array.h | 41 +++++------- moses2/EstimatedScores.cpp | 4 +- moses2/EstimatedScores.h | 6 +- moses2/FF/Distortion.cpp | 56 +++++++--------- moses2/FF/Distortion.h | 23 ++++--- 8 files changed, 119 insertions(+), 143 deletions(-) diff --git a/moses/Util.h b/moses/Util.h index 8a21a9b23..59f43c709 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -428,7 +428,7 @@ inline float CalcTranslationScore(const std::vector &probVector, out << *this; \ return out.str(); \ } \ - + //! delete and remove every element of a collection object such as set, list etc template void RemoveAllInColl(COLL &coll) diff --git a/moses2/ArcLists.cpp b/moses2/ArcLists.cpp index edc985465..1143024c0 100644 --- a/moses2/ArcLists.cpp +++ b/moses2/ArcLists.cpp @@ -19,110 +19,108 @@ namespace Moses2 ArcLists::ArcLists() { - // TODO Auto-generated constructor stub + // TODO Auto-generated constructor stub } ArcLists::~ArcLists() { - BOOST_FOREACH(const Coll::value_type &collPair, m_coll){ - const ArcList *arcList = collPair.second; - delete arcList; - } + BOOST_FOREACH(const Coll::value_type &collPair, m_coll) { + const ArcList *arcList = collPair.second; + delete arcList; + } } void ArcLists::AddArc(bool added, const HypothesisBase *currHypo, - const HypothesisBase *otherHypo) + const HypothesisBase *otherHypo) { - //cerr << added << " " << currHypo << " " << otherHypo << endl; - ArcList *arcList; - if (added) { - // we're winners! - if (otherHypo) { - // there was a existing losing hypo - arcList = &GetAndDetachArcList(otherHypo); - } - else { - // there was no existing hypo - arcList = new ArcList; - } - m_coll[currHypo] = arcList; - } - else { - // we're losers! - // there should be a winner, we're not doing beam pruning - UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo"); - arcList = &GetArcList(otherHypo); - } + //cerr << added << " " << currHypo << " " << otherHypo << endl; + ArcList *arcList; + if (added) { + // we're winners! + if (otherHypo) { + // there was a existing losing hypo + arcList = &GetAndDetachArcList(otherHypo); + } else { + // there was no existing hypo + arcList = new ArcList; + } + m_coll[currHypo] = arcList; + } else { + // we're losers! + // there should be a winner, we're not doing beam pruning + UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo"); + arcList = &GetArcList(otherHypo); + } - // in any case, add the curr hypo - arcList->push_back(currHypo); + // in any case, add the curr hypo + arcList->push_back(currHypo); } ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) { - Coll::iterator iter = m_coll.find(hypo); - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); - ArcList &arcList = *iter->second; - return arcList; + Coll::iterator iter = m_coll.find(hypo); + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); + ArcList &arcList = *iter->second; + return arcList; } const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const { - Coll::const_iterator iter = m_coll.find(hypo); + Coll::const_iterator iter = m_coll.find(hypo); - if (iter == m_coll.end()) { - cerr << "looking for:" << hypo << " have " << m_coll.size() << " :"; - BOOST_FOREACH(const Coll::value_type &collPair, m_coll){ - const HypothesisBase *hypo = collPair.first; - cerr << hypo << " "; - } - } + if (iter == m_coll.end()) { + cerr << "looking for:" << hypo << " have " << m_coll.size() << " :"; + BOOST_FOREACH(const Coll::value_type &collPair, m_coll) { + const HypothesisBase *hypo = collPair.first; + cerr << hypo << " "; + } + } - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo); - ArcList &arcList = *iter->second; - return arcList; + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo); + ArcList &arcList = *iter->second; + return arcList; } ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo) { - Coll::iterator iter = m_coll.find(hypo); - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); - ArcList &arcList = *iter->second; + Coll::iterator iter = m_coll.find(hypo); + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); + ArcList &arcList = *iter->second; - m_coll.erase(iter); + m_coll.erase(iter); - return arcList; + return arcList; } void ArcLists::Sort() { - BOOST_FOREACH(Coll::value_type &collPair, m_coll){ - ArcList &list = *collPair.second; - std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() ); - } + BOOST_FOREACH(Coll::value_type &collPair, m_coll) { + ArcList &list = *collPair.second; + std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() ); + } } void ArcLists::Delete(const HypothesisBase *hypo) { - //cerr << "hypo=" << hypo->Debug() << endl; - //cerr << "m_coll=" << m_coll.size() << endl; - Coll::iterator iter = m_coll.find(hypo); - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); - ArcList *arcList = iter->second; + //cerr << "hypo=" << hypo->Debug() << endl; + //cerr << "m_coll=" << m_coll.size() << endl; + Coll::iterator iter = m_coll.find(hypo); + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list"); + ArcList *arcList = iter->second; - m_coll.erase(iter); - delete arcList; + m_coll.erase(iter); + delete arcList; } std::string ArcLists::Debug(const System &system) const { - stringstream strm; - BOOST_FOREACH(const Coll::value_type &collPair, m_coll){ - const ArcList *arcList = collPair.second; - strm << arcList << "(" << arcList->size() << ") "; - } - return strm.str(); + stringstream strm; + BOOST_FOREACH(const Coll::value_type &collPair, m_coll) { + const ArcList *arcList = collPair.second; + strm << arcList << "(" << arcList->size() << ") "; + } + return strm.str(); } } diff --git a/moses2/ArcLists.h b/moses2/ArcLists.h index db606401f..742c9d9e2 100644 --- a/moses2/ArcLists.h +++ b/moses2/ArcLists.h @@ -23,7 +23,7 @@ public: virtual ~ArcLists(); void AddArc(bool added, const HypothesisBase *currHypo, - const HypothesisBase *otherHypo); + const HypothesisBase *otherHypo); void Sort(); void Delete(const HypothesisBase *hypo); diff --git a/moses2/Array.h b/moses2/Array.h index 59b003135..d9402a704 100644 --- a/moses2/Array.h +++ b/moses2/Array.h @@ -13,26 +13,21 @@ public: typedef T* iterator; typedef const T* const_iterator; //! iterators - const_iterator begin() const - { + const_iterator begin() const { return m_arr; } - const_iterator end() const - { + const_iterator end() const { return m_arr + m_size; } - iterator begin() - { + iterator begin() { return m_arr; } - iterator end() - { + iterator end() { return m_arr + m_size; } - Array(MemPool &pool, size_t size = 0, const T &val = T()) - { + Array(MemPool &pool, size_t size = 0, const T &val = T()) { m_size = size; m_maxSize = size; m_arr = pool.Allocate(size); @@ -41,26 +36,23 @@ public: } } - size_t size() const - { + size_t size() const { return m_size; } - const T& operator[](size_t ind) const - { + const T& operator[](size_t ind) const { return m_arr[ind]; } - T& operator[](size_t ind) - { + T& operator[](size_t ind) { return m_arr[ind]; } - T *GetArray() - { return m_arr; } + T *GetArray() { + return m_arr; + } - size_t hash() const - { + size_t hash() const { size_t seed = 0; for (size_t i = 0; i < m_size; ++i) { boost::hash_combine(seed, m_arr[i]); @@ -68,21 +60,18 @@ public: return seed; } - int Compare(const Array &compare) const - { + int Compare(const Array &compare) const { int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size); return cmp; } - bool operator==(const Array &compare) const - { + bool operator==(const Array &compare) const { int cmp = Compare(compare); return cmp == 0; } - void resize(size_t newSize) - { + void resize(size_t newSize) { assert(m_size < m_maxSize); m_size = newSize; } diff --git a/moses2/EstimatedScores.cpp b/moses2/EstimatedScores.cpp index dfe52bb2b..e71647ce5 100644 --- a/moses2/EstimatedScores.cpp +++ b/moses2/EstimatedScores.cpp @@ -99,8 +99,8 @@ float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos, } // end of a gap? else if (startGap != notInGap - && (bitmap.GetValue(currPos) == true - || (startPos <= currPos && currPos <= endPos))) { + && (bitmap.GetValue(currPos) == true + || (startPos <= currPos && currPos <= endPos))) { estimatedScore += GetValue(startGap, currPos - 1); startGap = notInGap; } diff --git a/moses2/EstimatedScores.h b/moses2/EstimatedScores.h index eae2e08ab..f85470783 100644 --- a/moses2/EstimatedScores.h +++ b/moses2/EstimatedScores.h @@ -36,8 +36,7 @@ class EstimatedScores: public Matrix { public: EstimatedScores(MemPool &pool, size_t size) : - Matrix(pool, size, size) - { + Matrix(pool, size, size) { } ~EstimatedScores(); // not implemented @@ -45,8 +44,7 @@ public: float CalcEstimatedScore(Bitmap const&) const; float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const; - std::ostream &Debug(std::ostream &out, const System &system) const - { + std::ostream &Debug(std::ostream &out, const System &system) const { for (size_t endPos = 0; endPos < GetSize(); endPos++) { for (size_t startPos = 0; startPos < GetSize(); startPos++) out << GetValue(startPos, endPos) << " "; diff --git a/moses2/FF/Distortion.cpp b/moses2/FF/Distortion.cpp index 1d7b7246d..3c0cd8cee 100644 --- a/moses2/FF/Distortion.cpp +++ b/moses2/FF/Distortion.cpp @@ -16,36 +16,30 @@ using namespace std; namespace Moses2 { -struct DistortionState_traditional: public FFState -{ +struct DistortionState_traditional: public FFState { Range range; int first_gap; DistortionState_traditional() : - range() - { + range() { // uninitialised } - void Set(const Range& wr, int fg) - { + void Set(const Range& wr, int fg) { range = wr; first_gap = fg; } - size_t hash() const - { + size_t hash() const { return range.GetEndPos(); } - virtual bool operator==(const FFState& other) const - { + virtual bool operator==(const FFState& other) const { const DistortionState_traditional& o = - static_cast(other); + static_cast(other); return range.GetEndPos() == o.range.GetEndPos(); } - virtual std::string ToString() const - { + virtual std::string ToString() const { stringstream sb; sb << first_gap << " " << range; return sb.str(); @@ -55,7 +49,7 @@ struct DistortionState_traditional: public FFState /////////////////////////////////////////////////////////////////////// Distortion::Distortion(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line) + StatefulFeatureFunction(startInd, line) { ReadParameters(); } @@ -71,10 +65,10 @@ FFState* Distortion::BlankState(MemPool &pool, const System &sys) const } void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const + const InputType &input, const Hypothesis &hypo) const { DistortionState_traditional &stateCast = - static_cast(state); + static_cast(state); // fake previous translated phrase start and end size_t start = NOT_FOUND; @@ -92,31 +86,31 @@ void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, } void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const { const DistortionState_traditional &prev = - static_cast(prevState); + static_cast(prevState); SCORE distortionScore = CalculateDistortionScore(prev.range, - hypo.GetInputPath().range, prev.first_gap); + hypo.GetInputPath().range, prev.first_gap); //cerr << "distortionScore=" << distortionScore << endl; scores.PlusEquals(mgr.system, *this, distortionScore); DistortionState_traditional &stateCast = - static_cast(state); + static_cast(state); stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos()); //cerr << "hypo=" << hypo.Debug(mgr.system) << endl; @@ -128,8 +122,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, bool useEarlyDistortionCost = false; if (!useEarlyDistortionCost) { return -(SCORE) ComputeDistortionDistance(prev, curr); - } - else { + } else { /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 Definitions: S : current source range @@ -162,7 +155,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, // case4: otherwise => return 2(nbWordBetween(S,S')+length(S)) //IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl; return (float) -2 - * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered()); + * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered()); } } @@ -173,16 +166,15 @@ int Distortion::ComputeDistortionDistance(const Range& prev, int dist = 0; if (prev.GetNumWordsCovered() == 0) { dist = current.GetStartPos(); - } - else { + } else { dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1; } return abs(dist); } void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/FF/Distortion.h b/moses2/FF/Distortion.h index 45577d1c3..685aa1445 100644 --- a/moses2/FF/Distortion.h +++ b/moses2/FF/Distortion.h @@ -23,33 +23,32 @@ public: virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; - virtual void EvaluateWhenApplied(const std::deque &hypos) const - { + virtual void EvaluateWhenApplied(const std::deque &hypos) const { } virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; protected: SCORE CalculateDistortionScore(const Range &prev, const Range &curr, - const int FirstGap) const; + const int FirstGap) const; int ComputeDistortionDistance(const Range& prev, const Range& current) const; From 84b918b389e5a2d5e31cca993e1e53ff1354f1b1 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 1 Feb 2017 00:27:14 +0000 Subject: [PATCH 106/176] beautify --- moses2/FF/FFState.cpp | 1 + moses2/FF/FFState.h | 15 +- moses2/FF/FeatureFunction.cpp | 21 +- moses2/FF/FeatureFunction.h | 44 +- moses2/FF/FeatureFunctions.cpp | 78 +- moses2/FF/FeatureFunctions.h | 33 +- moses2/FF/FeatureRegistry.cpp | 6 +- moses2/FF/FeatureRegistry.h | 8 +- .../BidirectionalReorderingState.cpp | 22 +- .../BidirectionalReorderingState.h | 8 +- .../HReorderingBackwardState.cpp | 16 +- .../HReorderingBackwardState.h | 6 +- .../HReorderingForwardState.cpp | 22 +- .../HReorderingForwardState.h | 6 +- moses2/FF/LexicalReordering/LRModel.cpp | 91 +- moses2/FF/LexicalReordering/LRModel.h | 30 +- moses2/FF/LexicalReordering/LRState.cpp | 12 +- moses2/FF/LexicalReordering/LRState.h | 8 +- .../LexicalReordering/LexicalReordering.cpp | 62 +- .../FF/LexicalReordering/LexicalReordering.h | 34 +- .../PhraseBasedReorderingState.cpp | 28 +- .../PhraseBasedReorderingState.h | 11 +- .../FF/LexicalReordering/ReorderingStack.cpp | 17 +- moses2/FF/OSM/OpSequenceModel.cpp | 4 +- moses2/FF/OSM/OpSequenceModel.h | 20 +- moses2/FF/OSM/osmHyp.h | 5 +- moses2/FF/PhrasePenalty.cpp | 10 +- moses2/FF/PhrasePenalty.h | 8 +- moses2/FF/PointerState.cpp | 6 + moses2/FF/PointerState.h | 18 +- moses2/FF/SkeletonStatefulFF.cpp | 14 +- moses2/FF/SkeletonStatefulFF.h | 18 +- moses2/FF/SkeletonStatelessFF.cpp | 2 +- moses2/FF/SkeletonStatelessFF.h | 8 +- moses2/FF/StatefulFeatureFunction.cpp | 8 +- moses2/FF/StatefulFeatureFunction.h | 20 +- moses2/FF/StatelessFeatureFunction.cpp | 2 +- moses2/FF/WordPenalty.cpp | 10 +- moses2/FF/WordPenalty.h | 8 +- moses2/HypothesisBase.cpp | 16 +- moses2/HypothesisBase.h | 34 +- moses2/HypothesisColl.cpp | 195 +- moses2/HypothesisColl.h | 22 +- moses2/InputPathBase.cpp | 4 +- moses2/InputPathBase.h | 2 +- moses2/InputPathsBase.h | 15 +- moses2/InputType.cpp | 42 +- moses2/InputType.h | 48 +- moses2/LM/GPULM.cpp | 55 +- moses2/LM/GPULM.h | 29 +- moses2/LM/KENLM.cpp | 155 +- moses2/LM/KENLM.h | 31 +- moses2/LM/KENLMBatch.cpp | 114 +- moses2/LM/KENLMBatch.h | 27 +- moses2/LM/LanguageModel.cpp | 68 +- moses2/LM/LanguageModel.h | 37 +- moses2/LM/LanguageModelDALM.cpp | 96 +- moses2/LM/LanguageModelDALM.h | 14 +- moses2/Main.cpp | 44 +- moses2/Main.h | 3 +- moses2/ManagerBase.cpp | 20 +- moses2/ManagerBase.h | 27 +- moses2/MemPool.cpp | 10 +- moses2/MemPool.h | 51 +- moses2/MemPoolAllocator.h | 27 +- moses2/MorphoTrie/MorphTrie.h | 20 +- moses2/MorphoTrie/Node.h | 22 +- moses2/Phrase.h | 29 +- .../PhraseBased/CubePruningMiniStack/Misc.cpp | 40 +- .../PhraseBased/CubePruningMiniStack/Misc.h | 21 +- .../CubePruningMiniStack/Search.cpp | 294 +- .../CubePruningMiniStack/Stack.cpp | 124 +- .../PhraseBased/CubePruningMiniStack/Stack.h | 12 +- moses2/PhraseBased/Hypothesis.cpp | 38 +- moses2/PhraseBased/Hypothesis.h | 28 +- moses2/PhraseBased/InputPath.cpp | 10 +- moses2/PhraseBased/InputPath.h | 7 +- moses2/PhraseBased/InputPaths.h | 13 +- moses2/PhraseBased/Manager.cpp | 322 +- moses2/PhraseBased/Manager.h | 22 +- moses2/PhraseBased/Normal/Search.cpp | 158 +- moses2/PhraseBased/Normal/Search.h | 4 +- moses2/PhraseBased/Normal/Stack.cpp | 2 +- moses2/PhraseBased/Normal/Stacks.cpp | 7 +- moses2/PhraseBased/Normal/Stacks.h | 14 +- moses2/PhraseBased/PhraseImpl.cpp | 2 +- moses2/PhraseBased/PhraseImpl.h | 5 +- moses2/PhraseBased/ReorderingConstraint.cpp | 4 +- moses2/PhraseBased/Search.cpp | 11 +- moses2/PhraseBased/Search.h | 8 +- moses2/PhraseBased/Sentence.cpp | 141 +- moses2/PhraseBased/Sentence.h | 20 +- moses2/PhraseBased/TargetPhraseImpl.cpp | 8 +- moses2/PhraseBased/TargetPhraseImpl.h | 17 +- moses2/PhraseBased/TargetPhrases.cpp | 10 +- moses2/PhraseBased/TargetPhrases.h | 15 +- moses2/PhraseBased/TrellisPath.cpp | 18 +- moses2/PhraseBased/TrellisPath.h | 17 +- moses2/PhraseImplTemplate.h | 30 +- moses2/Recycler.h | 24 +- moses2/SCFG/ActiveChart.cpp | 32 +- moses2/SCFG/ActiveChart.h | 41 +- moses2/SCFG/Hypothesis.cpp | 60 +- moses2/SCFG/Hypothesis.h | 35 +- moses2/SCFG/InputPath.cpp | 20 +- moses2/SCFG/InputPath.h | 17 +- moses2/SCFG/InputPaths.cpp | 2 +- moses2/SCFG/InputPaths.h | 3 +- moses2/SCFG/Manager.cpp | 59 +- moses2/SCFG/Manager.h | 37 +- moses2/SCFG/Misc.cpp | 75 +- moses2/SCFG/Misc.h | 68 +- moses2/SCFG/PhraseImpl.cpp | 2 +- moses2/SCFG/PhraseImpl.h | 5 +- moses2/SCFG/Sentence.cpp | 119 +- moses2/SCFG/Sentence.h | 20 +- moses2/SCFG/Stack.cpp | 12 +- moses2/SCFG/Stack.h | 7 +- moses2/SCFG/Stacks.h | 15 +- moses2/SCFG/TargetPhraseImpl.cpp | 28 +- moses2/SCFG/TargetPhraseImpl.h | 19 +- moses2/SCFG/TargetPhrases.cpp | 10 +- moses2/SCFG/TargetPhrases.h | 17 +- moses2/SCFG/Word.cpp | 82 +- moses2/SCFG/Word.h | 22 +- moses2/SCFG/nbest/KBestExtractor.cpp | 54 +- moses2/SCFG/nbest/NBest.cpp | 231 +- moses2/SCFG/nbest/NBest.h | 72 +- moses2/SCFG/nbest/NBestColl.cpp | 31 +- moses2/SCFG/nbest/NBestColl.h | 10 +- moses2/SCFG/nbest/NBests.cpp | 130 +- moses2/SCFG/nbest/NBests.h | 39 +- moses2/Scores.cpp | 38 +- moses2/Scores.h | 27 +- moses2/SubPhrase.h | 22 +- moses2/System.cpp | 63 +- moses2/TargetPhrase.h | 92 +- .../CompactPT/BlockHashIndex.cpp | 32 +- .../CompactPT/BlockHashIndex.h | 19 +- .../CompactPT/CanonicalHuffman.h | 84 +- .../CompactPT/CmphStringVectorAdapter.cpp | 2 +- .../CompactPT/CmphStringVectorAdapter.h | 9 +- .../LexicalReorderingTableCompact.cpp | 39 +- .../CompactPT/LexicalReorderingTableCompact.h | 44 +- .../TranslationModel/CompactPT/ListCoders.h | 37 +- .../CompactPT/MmapAllocator.h | 82 +- .../CompactPT/MonotonicVector.h | 63 +- .../CompactPT/MurmurHash3.cpp | 4 +- .../TranslationModel/CompactPT/PackedArray.h | 48 +- .../CompactPT/PhraseDecoder.cpp | 26 +- .../CompactPT/PhraseDecoder.h | 20 +- .../CompactPT/PhraseTableCompact.cpp | 112 +- .../CompactPT/PhraseTableCompact.h | 38 +- .../TranslationModel/CompactPT/StringVector.h | 110 +- .../CompactPT/TargetPhraseCollectionCache.h | 6 +- moses2/TranslationModel/Memory/Node.h | 43 +- .../Memory/PhraseTableMemory.cpp | 63 +- .../Memory/PhraseTableMemory.h | 44 +- moses2/TranslationModel/PhraseTable.cpp | 95 +- moses2/TranslationModel/PhraseTable.h | 85 +- .../TranslationModel/ProbingPT/ProbingPT.cpp | 276 +- moses2/TranslationModel/ProbingPT/ProbingPT.h | 93 +- .../ProbingPT/StoreTarget.cpp | 40 +- .../TranslationModel/ProbingPT/StoreTarget.h | 2 +- .../TranslationModel/ProbingPT/StoreVocab.h | 14 +- moses2/TranslationModel/ProbingPT/hash.cpp | 14 +- .../TranslationModel/ProbingPT/querying.cpp | 63 +- moses2/TranslationModel/ProbingPT/storing.cpp | 33 +- moses2/TranslationModel/ProbingPT/vocabid.cpp | 14 +- moses2/TranslationModel/Transliteration.cpp | 92 +- moses2/TranslationModel/Transliteration.h | 74 +- .../TranslationModel/UnknownWordPenalty.cpp | 118 +- moses2/TranslationModel/UnknownWordPenalty.h | 74 +- moses2/TranslationTask.cpp | 11 +- moses2/TrellisPaths.h | 25 +- moses2/TypeDef.h | 9 +- moses2/Vector.h | 6 +- moses2/Weights.cpp | 4 +- moses2/Weights.h | 3 +- moses2/Word.cpp | 16 +- moses2/Word.h | 14 +- moses2/defer/CubePruningBitmapStack/Misc.cpp | 162 +- moses2/defer/CubePruningBitmapStack/Misc.h | 78 +- .../defer/CubePruningBitmapStack/Search.cpp | 228 +- moses2/defer/CubePruningBitmapStack/Search.h | 32 +- moses2/defer/CubePruningBitmapStack/Stack.cpp | 226 +- moses2/defer/CubePruningBitmapStack/Stack.h | 79 +- .../defer/CubePruningCardinalStack/Misc.cpp | 162 +- moses2/defer/CubePruningCardinalStack/Misc.h | 80 +- .../defer/CubePruningCardinalStack/Search.cpp | 228 +- .../defer/CubePruningCardinalStack/Search.h | 32 +- .../defer/CubePruningCardinalStack/Stack.cpp | 100 +- moses2/defer/CubePruningCardinalStack/Stack.h | 45 +- moses2/defer/CubePruningPerBitmap/Misc.cpp | 164 +- moses2/defer/CubePruningPerBitmap/Misc.h | 80 +- moses2/defer/CubePruningPerBitmap/Search.cpp | 284 +- moses2/defer/CubePruningPerBitmap/Search.h | 36 +- moses2/defer/CubePruningPerBitmap/Stacks.cpp | 32 +- moses2/defer/CubePruningPerBitmap/Stacks.h | 34 +- moses2/defer/CubePruningPerMiniStack/Misc.cpp | 164 +- moses2/defer/CubePruningPerMiniStack/Misc.h | 80 +- .../defer/CubePruningPerMiniStack/Search.cpp | 270 +- moses2/defer/CubePruningPerMiniStack/Search.h | 36 +- .../defer/CubePruningPerMiniStack/Stacks.cpp | 32 +- moses2/defer/CubePruningPerMiniStack/Stacks.h | 34 +- moses2/legacy/Bitmap.cpp | 6 +- moses2/legacy/Bitmap.h | 332 +- moses2/legacy/Bitmaps.cpp | 13 +- moses2/legacy/Bitmaps.h | 5 +- moses2/legacy/Factor.h | 21 +- moses2/legacy/FactorCollection.cpp | 11 +- moses2/legacy/FactorCollection.h | 27 +- moses2/legacy/InputFileStream.cpp | 5 +- moses2/legacy/Matrix.h | 27 +- moses2/legacy/OutputCollector.h | 49 +- moses2/legacy/OutputFileStream.cpp | 9 +- moses2/legacy/Parameter.cpp | 260 +- moses2/legacy/Parameter.h | 44 +- moses2/legacy/Range.h | 28 +- moses2/legacy/ThreadPool.cpp | 9 +- moses2/legacy/ThreadPool.h | 23 +- moses2/legacy/Timer.cpp | 5 +- moses2/legacy/Util2.cpp | 7 +- moses2/legacy/Util2.h | 73 +- moses2/legacy/gzfilebuf.h | 31 +- moses2/parameters/AllOptions.cpp | 207 +- moses2/parameters/AllOptions.h | 55 +- moses2/parameters/BeamSearchOptions.h | 11 +- moses2/parameters/BookkeepingOptions.h | 13 +- moses2/parameters/ContextParameters.cpp | 19 +- moses2/parameters/CubePruningOptions.cpp | 118 +- moses2/parameters/CubePruningOptions.h | 25 +- moses2/parameters/InputOptions.cpp | 161 +- moses2/parameters/InputOptions.h | 33 +- moses2/parameters/LMBR_Options.cpp | 52 +- moses2/parameters/LMBR_Options.h | 27 +- moses2/parameters/LookupOptions.h | 11 +- moses2/parameters/MBR_Options.cpp | 30 +- moses2/parameters/MBR_Options.h | 19 +- moses2/parameters/NBestOptions.cpp | 30 +- moses2/parameters/NBestOptions.h | 3 +- moses2/parameters/OOVHandlingOptions.cpp | 64 +- moses2/parameters/OOVHandlingOptions.h | 27 +- moses2/parameters/OptionsBaseClass.cpp | 32 +- moses2/parameters/OptionsBaseClass.h | 15 +- moses2/parameters/ReorderingOptions.cpp | 42 +- moses2/parameters/ReorderingOptions.h | 19 +- moses2/parameters/ReportingOptions.cpp | 260 +- moses2/parameters/ReportingOptions.h | 87 +- moses2/parameters/SearchOptions.cpp | 169 +- moses2/parameters/SearchOptions.h | 69 +- moses2/parameters/ServerOptions.cpp | 32 +- moses2/parameters/ServerOptions.h | 48 +- moses2/parameters/SyntaxOptions.cpp | 8 +- moses2/parameters/SyntaxOptions.h | 3 +- moses2/pugixml.cpp | 21236 ++++++++-------- moses2/server/Server.cpp | 36 +- moses2/server/TranslationRequest.cpp | 24 +- moses2/server/TranslationRequest.h | 10 +- moses2/server/Translator.cpp | 10 +- moses2/server/Translator.h | 2 +- 261 files changed, 16155 insertions(+), 17694 deletions(-) diff --git a/moses2/FF/FFState.cpp b/moses2/FF/FFState.cpp index e69de29bb..c92b213fa 100644 --- a/moses2/FF/FFState.cpp +++ b/moses2/FF/FFState.cpp @@ -0,0 +1 @@ +#include "FFState.h" diff --git a/moses2/FF/FFState.h b/moses2/FF/FFState.h index 33ef5d1f6..41789b7dc 100644 --- a/moses2/FF/FFState.h +++ b/moses2/FF/FFState.h @@ -10,14 +10,12 @@ namespace Moses2 class FFState { public: - virtual ~FFState() - { + virtual ~FFState() { } virtual size_t hash() const = 0; virtual bool operator==(const FFState& other) const = 0; - virtual bool operator!=(const FFState& other) const - { + virtual bool operator!=(const FFState& other) const { return !(*this == other); } @@ -35,17 +33,14 @@ inline std::ostream& operator<<(std::ostream& out, const FFState& obj) class DummyState: public FFState { public: - DummyState() - { + DummyState() { } - virtual size_t hash() const - { + virtual size_t hash() const { return 0; } - virtual bool operator==(const FFState& other) const - { + virtual bool operator==(const FFState& other) const { return true; } diff --git a/moses2/FF/FeatureFunction.cpp b/moses2/FF/FeatureFunction.cpp index 3326ceaa4..6b4617dc5 100644 --- a/moses2/FF/FeatureFunction.cpp +++ b/moses2/FF/FeatureFunction.cpp @@ -17,10 +17,10 @@ namespace Moses2 { FeatureFunction::FeatureFunction(size_t startInd, const std::string &line) -:m_startInd(startInd) -,m_numScores(1) -,m_PhraseTableInd(NOT_FOUND) -,m_tuneable(true) + :m_startInd(startInd) + ,m_numScores(1) + ,m_PhraseTableInd(NOT_FOUND) + ,m_tuneable(true) { ParseLine(line); //cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl; @@ -43,18 +43,16 @@ void FeatureFunction::ParseLine(const std::string &line) for (size_t i = 1; i < toks.size(); ++i) { vector args = TokenizeFirstOnly(toks[i], "="); UTIL_THROW_IF2(args.size() != 2, - "Incorrect format for feature function arg: " << toks[i]); + "Incorrect format for feature function arg: " << toks[i]); pair::iterator, bool> ret = keys.insert(args[0]); UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line); if (args[0] == "num-features") { m_numScores = Scan(args[1]); - } - else if (args[0] == "name") { + } else if (args[0] == "name") { m_name = args[1]; - } - else { + } else { m_args.push_back(args); } } @@ -71,12 +69,11 @@ void FeatureFunction::ReadParameters() } void FeatureFunction::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { if (key == "tuneable") { m_tuneable = Scan(value); - } - else { + } else { UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value); } } diff --git a/moses2/FF/FeatureFunction.h b/moses2/FF/FeatureFunction.h index 1e25fce39..102bda8f1 100644 --- a/moses2/FF/FeatureFunction.h +++ b/moses2/FF/FeatureFunction.h @@ -39,44 +39,35 @@ public: FeatureFunction(size_t startInd, const std::string &line); virtual ~FeatureFunction(); - virtual void Load(System &system) - { + virtual void Load(System &system) { } - size_t GetStartInd() const - { + size_t GetStartInd() const { return m_startInd; } - size_t GetNumScores() const - { + size_t GetNumScores() const { return m_numScores; } - const std::string &GetName() const - { + const std::string &GetName() const { return m_name; } - void SetName(const std::string &val) - { + void SetName(const std::string &val) { m_name = val; } - virtual size_t HasPhraseTableInd() const - { + virtual size_t HasPhraseTableInd() const { return false; } - void SetPhraseTableInd(size_t ind) - { + void SetPhraseTableInd(size_t ind) { m_PhraseTableInd = ind; } - size_t GetPhraseTableInd() const - { + size_t GetPhraseTableInd() const { return m_PhraseTableInd; } //! if false, then this feature is not displayed in the n-best list. // use with care - virtual bool IsTuneable() const - { + virtual bool IsTuneable() const { return m_tuneable; } @@ -85,30 +76,27 @@ public: // may have more factors than actually need, but not guaranteed. virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const = 0; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const = 0; // For SCFG decoding, the source can contain non-terminals, NOT the raw // source from the input sentence virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const = 0; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const = 0; // used by lexicalised reordering model to add scores to tp data structures virtual void EvaluateAfterTablePruning(MemPool &pool, - const TargetPhrases &tps, const Phrase &sourcePhrase) const - { + const TargetPhrases &tps, const Phrase &sourcePhrase) const { } virtual void EvaluateAfterTablePruning(MemPool &pool, - const SCFG::TargetPhrases &tps, const Phrase &sourcePhrase) const - { + const SCFG::TargetPhrases &tps, const Phrase &sourcePhrase) const { } // clean up temporary memory, called after processing each sentence - virtual void CleanUpAfterSentenceProcessing() const - { + virtual void CleanUpAfterSentenceProcessing() const { } protected: diff --git a/moses2/FF/FeatureFunctions.cpp b/moses2/FF/FeatureFunctions.cpp index 4e4f5b54c..0e61fb0e4 100644 --- a/moses2/FF/FeatureFunctions.cpp +++ b/moses2/FF/FeatureFunctions.cpp @@ -25,7 +25,7 @@ using namespace std; namespace Moses2 { FeatureFunctions::FeatureFunctions(System &system) : - m_system(system), m_ffStartInd(0) + m_system(system), m_ffStartInd(0) { } @@ -37,27 +37,26 @@ FeatureFunctions::~FeatureFunctions() void FeatureFunctions::Load() { // load, everything but pts - BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ - FeatureFunction *nonConstFF = const_cast(ff); - PhraseTable *pt = dynamic_cast(nonConstFF); + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { + FeatureFunction *nonConstFF = const_cast(ff); + PhraseTable *pt = dynamic_cast(nonConstFF); - if (pt) { - // do nothing. load pt last + if (pt) { + // do nothing. load pt last + } else { + cerr << "Loading " << nonConstFF->GetName() << endl; + nonConstFF->Load(m_system); + cerr << "Finished loading " << nonConstFF->GetName() << endl; + } } - else { - cerr << "Loading " << nonConstFF->GetName() << endl; - nonConstFF->Load(m_system); - cerr << "Finished loading " << nonConstFF->GetName() << endl; - } -} // load pt -BOOST_FOREACH(const PhraseTable *pt, phraseTables) { - PhraseTable *nonConstPT = const_cast(pt); - cerr << "Loading " << nonConstPT->GetName() << endl; - nonConstPT->Load(m_system); - cerr << "Finished loading " << nonConstPT->GetName() << endl; -} + BOOST_FOREACH(const PhraseTable *pt, phraseTables) { + PhraseTable *nonConstPT = const_cast(pt); + cerr << "Loading " << nonConstPT->GetName() << endl; + nonConstPT->Load(m_system); + cerr << "Finished loading " << nonConstPT->GetName() << endl; + } } void FeatureFunctions::Create() @@ -67,7 +66,7 @@ void FeatureFunctions::Create() const PARAM_VEC *ffParams = params.GetParam("feature"); UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section"); - BOOST_FOREACH(const std::string &line, *ffParams){ + BOOST_FOREACH(const std::string &line, *ffParams) { //cerr << "line=" << line << endl; FeatureFunction *ff = Create(line); @@ -129,34 +128,33 @@ std::string FeatureFunctions::GetDefaultName(const std::string &stub) { size_t ind; boost::unordered_map::iterator iter = - m_defaultNames.find(stub); + m_defaultNames.find(stub); if (iter == m_defaultNames.end()) { m_defaultNames[stub] = 0; ind = 0; - } - else { + } else { ind = ++(iter->second); } return stub + SPrint(ind); } const FeatureFunction *FeatureFunctions::FindFeatureFunction( - const std::string &name) const + const std::string &name) const { - BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ - if (ff->GetName() == name) { - return ff; - } - } - return NULL; + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { + if (ff->GetName() == name) { + return ff; + } + } + return NULL; } FeatureFunction *FeatureFunctions::FindFeatureFunction( - const std::string &name) + const std::string &name) { - BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { if (ff->GetName() == name) { - return const_cast(ff); + return const_cast(ff); } } return NULL; @@ -184,7 +182,7 @@ void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system, { SCORE estimatedScore = 0; - BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { Scores& scores = targetPhrase.GetScores(); ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore); } @@ -193,14 +191,14 @@ void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system, } void FeatureFunctions::EvaluateInIsolation( - MemPool &pool, - const System &system, - const Phrase &source, - SCFG::TargetPhraseImpl &targetPhrase) const + MemPool &pool, + const System &system, + const Phrase &source, + SCFG::TargetPhraseImpl &targetPhrase) const { SCORE estimatedScore = 0; - BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){ + BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { Scores& scores = targetPhrase.GetScores(); ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore); } @@ -234,7 +232,7 @@ void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const void FeatureFunctions::CleanUpAfterSentenceProcessing() const { BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { - ff->CleanUpAfterSentenceProcessing(); + ff->CleanUpAfterSentenceProcessing(); } } @@ -274,7 +272,7 @@ void FeatureFunctions::OverrideFeatures() UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr); cerr << "Override " << ff->GetName() << " " - << keyVal[0] << "=" << keyVal[1] << endl; + << keyVal[0] << "=" << keyVal[1] << endl; ff->SetParameter(keyVal[0], keyVal[1]); diff --git a/moses2/FF/FeatureFunctions.h b/moses2/FF/FeatureFunctions.h index 110ebf736..6a3f9bb78 100644 --- a/moses2/FF/FeatureFunctions.h +++ b/moses2/FF/FeatureFunctions.h @@ -47,17 +47,21 @@ public: FeatureFunctions(System &system); virtual ~FeatureFunctions(); - const std::vector &GetFeatureFunctions() const - { return m_featureFunctions; } + const std::vector &GetFeatureFunctions() const { + return m_featureFunctions; + } - const std::vector &GetStatefulFeatureFunctions() const - { return m_statefulFeatureFunctions; } + const std::vector &GetStatefulFeatureFunctions() const { + return m_statefulFeatureFunctions; + } - const std::vector &GetWithPhraseTableInd() const - { return m_withPhraseTableInd; } + const std::vector &GetWithPhraseTableInd() const { + return m_withPhraseTableInd; + } - size_t GetNumScores() const - { return m_ffStartInd; } + size_t GetNumScores() const { + return m_ffStartInd; + } void Create(); void Load(); @@ -65,19 +69,20 @@ public: const FeatureFunction *FindFeatureFunction(const std::string &name) const; const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd); - const UnknownWordPenalty *GetUnknownWordPenalty() const - { return m_unkWP; } + const UnknownWordPenalty *GetUnknownWordPenalty() const { + return m_unkWP; + } // the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand void EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, TargetPhraseImpl &targetPhrase) const; + const Phrase &source, TargetPhraseImpl &targetPhrase) const; void EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, SCFG::TargetPhraseImpl &targetPhrase) const; + const Phrase &source, SCFG::TargetPhraseImpl &targetPhrase) const; void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps, - const Phrase &sourcePhrase) const; + const Phrase &sourcePhrase) const; void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps, - const Phrase &sourcePhrase) const; + const Phrase &sourcePhrase) const; void EvaluateWhenAppliedBatch(const Batch &batch) const; diff --git a/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp index 3ec8706e5..52f6afca5 100644 --- a/moses2/FF/FeatureRegistry.cpp +++ b/moses2/FF/FeatureRegistry.cpp @@ -31,8 +31,7 @@ template class DefaultFeatureFactory: public FeatureFactory { public: - FeatureFunction *Create(size_t startInd, const std::string &line) const - { + FeatureFunction *Create(size_t startInd, const std::string &line) const { return new F(startInd, line); } }; @@ -41,8 +40,7 @@ public: class KenFactory: public FeatureFactory { public: - FeatureFunction *Create(size_t startInd, const std::string &line) const - { + FeatureFunction *Create(size_t startInd, const std::string &line) const { ConstructKenLM(startInd, line); } }; diff --git a/moses2/FF/FeatureRegistry.h b/moses2/FF/FeatureRegistry.h index 63e78aae0..1e6fd399d 100644 --- a/moses2/FF/FeatureRegistry.h +++ b/moses2/FF/FeatureRegistry.h @@ -10,15 +10,13 @@ class FeatureFunction; class FeatureFactory { public: - virtual ~FeatureFactory() - { + virtual ~FeatureFactory() { } virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0; protected: - FeatureFactory() - { + FeatureFactory() { } }; @@ -33,7 +31,7 @@ public: ~FeatureRegistry(); FeatureFunction *Construct(size_t startInd, const std::string &name, - const std::string &line) const; + const std::string &line) const; void PrintFF() const; private: diff --git a/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp index 8c1b409c3..36e232f91 100644 --- a/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp +++ b/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp @@ -15,9 +15,9 @@ namespace Moses2 { BidirectionalReorderingState::BidirectionalReorderingState( - const LRModel &config, LRState *bw, LRState *fw, size_t offset) : - LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward( - fw) + const LRModel &config, LRState *bw, LRState *fw, size_t offset) : + LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward( + fw) { } @@ -27,8 +27,8 @@ BidirectionalReorderingState::~BidirectionalReorderingState() } void BidirectionalReorderingState::Init(const LRState *prev, - const TargetPhrase &topt, const InputPathBase &path, bool first, - const Bitmap *coverage) + const TargetPhrase &topt, const InputPathBase &path, bool first, + const Bitmap *coverage) { if (m_backward) { m_backward->Init(prev, topt, path, first, coverage); @@ -41,7 +41,7 @@ void BidirectionalReorderingState::Init(const LRState *prev, std::string BidirectionalReorderingState::ToString() const { return "BidirectionalReorderingState " + SPrint(this) + " " - + SPrint(m_backward) + " " + SPrint(m_forward); + + SPrint(m_backward) + " " + SPrint(m_forward); } size_t BidirectionalReorderingState::hash() const @@ -57,10 +57,10 @@ bool BidirectionalReorderingState::operator==(const FFState& o) const if (&o == this) return true; BidirectionalReorderingState const &other = - static_cast(o); + static_cast(o); bool ret = (*m_backward == *other.m_backward) - && (*m_forward == *other.m_forward); + && (*m_forward == *other.m_forward); return ret; } @@ -69,11 +69,11 @@ void BidirectionalReorderingState::Expand(const ManagerBase &mgr, Scores &scores, FFState &state) const { BidirectionalReorderingState &stateCast = - static_cast(state); + static_cast(state); m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores, - *stateCast.m_backward); + *stateCast.m_backward); m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores, - *stateCast.m_forward); + *stateCast.m_forward); } } /* namespace Moses2 */ diff --git a/moses2/FF/LexicalReordering/BidirectionalReorderingState.h b/moses2/FF/LexicalReordering/BidirectionalReorderingState.h index 487e84928..289809798 100644 --- a/moses2/FF/LexicalReordering/BidirectionalReorderingState.h +++ b/moses2/FF/LexicalReordering/BidirectionalReorderingState.h @@ -14,12 +14,12 @@ class BidirectionalReorderingState: public LRState { public: BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw, - size_t offset); + size_t offset); virtual ~BidirectionalReorderingState(); void Init(const LRState *prev, const TargetPhrase &topt, - const InputPathBase &path, bool first, const Bitmap *coverage); + const InputPathBase &path, bool first, const Bitmap *coverage); size_t hash() const; virtual bool operator==(const FFState& other) const; @@ -27,8 +27,8 @@ public: virtual std::string ToString() const; void Expand(const ManagerBase &mgr, const LexicalReordering &ff, - const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, + FFState &state) const; protected: LRState *m_backward; diff --git a/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp b/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp index 600a208b9..a54cd7fcf 100644 --- a/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp +++ b/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp @@ -14,7 +14,7 @@ namespace Moses2 HReorderingBackwardState::HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset) : - LRState(config, LRModel::Backward, offset), reoStack(pool) + LRState(config, LRModel::Backward, offset), reoStack(pool) { // TODO Auto-generated constructor stub @@ -26,8 +26,8 @@ HReorderingBackwardState::~HReorderingBackwardState() } void HReorderingBackwardState::Init(const LRState *prev, - const TargetPhrase &topt, const InputPathBase &path, bool first, - const Bitmap *coverage) + const TargetPhrase &topt, const InputPathBase &path, bool first, + const Bitmap *coverage) { prevTP = &topt; reoStack.Init(); @@ -42,7 +42,7 @@ size_t HReorderingBackwardState::hash() const bool HReorderingBackwardState::operator==(const FFState& o) const { const HReorderingBackwardState& other = - static_cast(o); + static_cast(o); bool ret = reoStack == other.reoStack; return ret; } @@ -53,13 +53,13 @@ std::string HReorderingBackwardState::ToString() const } void HReorderingBackwardState::Expand(const ManagerBase &mgr, - const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, - Scores &scores, FFState &state) const + const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, + Scores &scores, FFState &state) const { HReorderingBackwardState &nextState = - static_cast(state); + static_cast(state); nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false, - NULL); + NULL); nextState.reoStack = reoStack; const Range &swrange = hypo.GetInputPath().range; diff --git a/moses2/FF/LexicalReordering/HReorderingBackwardState.h b/moses2/FF/LexicalReordering/HReorderingBackwardState.h index 9977724d3..8cdea5a44 100644 --- a/moses2/FF/LexicalReordering/HReorderingBackwardState.h +++ b/moses2/FF/LexicalReordering/HReorderingBackwardState.h @@ -20,7 +20,7 @@ public: HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset); virtual void Init(const LRState *prev, const TargetPhrase &topt, - const InputPathBase &path, bool first, const Bitmap *coverage); + const InputPathBase &path, bool first, const Bitmap *coverage); virtual ~HReorderingBackwardState(); @@ -28,8 +28,8 @@ public: virtual bool operator==(const FFState& other) const; virtual std::string ToString() const; void Expand(const ManagerBase &mgr, const LexicalReordering &ff, - const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, + FFState &state) const; }; diff --git a/moses2/FF/LexicalReordering/HReorderingForwardState.cpp b/moses2/FF/LexicalReordering/HReorderingForwardState.cpp index c50626106..1041115f7 100644 --- a/moses2/FF/LexicalReordering/HReorderingForwardState.cpp +++ b/moses2/FF/LexicalReordering/HReorderingForwardState.cpp @@ -15,7 +15,7 @@ namespace Moses2 HReorderingForwardState::HReorderingForwardState(const LRModel &config, size_t offset) : - LRState(config, LRModel::Forward, offset), m_first(true) + LRState(config, LRModel::Forward, offset), m_first(true) { prevPath = NULL; m_coverage = NULL; @@ -27,8 +27,8 @@ HReorderingForwardState::~HReorderingForwardState() } void HReorderingForwardState::Init(const LRState *prev, - const TargetPhrase &topt, const InputPathBase &path, bool first, - const Bitmap *coverage) + const TargetPhrase &topt, const InputPathBase &path, bool first, + const Bitmap *coverage) { prevTP = &topt; prevPath = &path; @@ -48,12 +48,12 @@ bool HReorderingForwardState::operator==(const FFState& o) const if (&o == this) return true; HReorderingForwardState const& other = - static_cast(o); + static_cast(o); int compareScores = ( - (prevPath->range == other.prevPath->range) ? - ComparePrevScores(other.prevTP) : - (prevPath->range < other.prevPath->range) ? -1 : 1); + (prevPath->range == other.prevPath->range) ? + ComparePrevScores(other.prevTP) : + (prevPath->range < other.prevPath->range) ? -1 : 1); return compareScores == 0; } @@ -63,8 +63,8 @@ std::string HReorderingForwardState::ToString() const } void HReorderingForwardState::Expand(const ManagerBase &mgr, - const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, - Scores &scores, FFState &state) const + const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, + Scores &scores, FFState &state) const { const Range &cur = hypo.GetInputPath().range; // keep track of the current coverage ourselves so we don't need the hypothesis @@ -79,9 +79,9 @@ void HReorderingForwardState::Expand(const ManagerBase &mgr, } HReorderingForwardState &stateCast = - static_cast(state); + static_cast(state); stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false, - &cov); + &cov); } } /* namespace Moses2 */ diff --git a/moses2/FF/LexicalReordering/HReorderingForwardState.h b/moses2/FF/LexicalReordering/HReorderingForwardState.h index 8f9b8bd23..51358daa3 100644 --- a/moses2/FF/LexicalReordering/HReorderingForwardState.h +++ b/moses2/FF/LexicalReordering/HReorderingForwardState.h @@ -20,14 +20,14 @@ public: virtual ~HReorderingForwardState(); void Init(const LRState *prev, const TargetPhrase &topt, - const InputPathBase &path, bool first, const Bitmap *coverage); + const InputPathBase &path, bool first, const Bitmap *coverage); size_t hash() const; virtual bool operator==(const FFState& other) const; virtual std::string ToString() const; void Expand(const ManagerBase &mgr, const LexicalReordering &ff, - const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, + FFState &state) const; protected: bool m_first; diff --git a/moses2/FF/LexicalReordering/LRModel.cpp b/moses2/FF/LexicalReordering/LRModel.cpp index 47b711369..c2a914009 100644 --- a/moses2/FF/LexicalReordering/LRModel.cpp +++ b/moses2/FF/LexicalReordering/LRModel.cpp @@ -22,8 +22,8 @@ namespace Moses2 { bool IsMonotonicStep(Range const& prev, // words range of last source phrase - Range const& cur, // words range of current source phrase - Bitmap const& cov) // coverage bitmap + Range const& cur, // words range of current source phrase + Bitmap const& cov) // coverage bitmap { size_t e = prev.GetEndPos() + 1; size_t s = cur.GetStartPos(); @@ -38,19 +38,17 @@ bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov) } LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) : - m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction( - Backward), m_scoreProducer(&ff) + m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction( + Backward), m_scoreProducer(&ff) { std::vector config = Tokenize(modelType, "-"); for (size_t i = 0; i < config.size(); ++i) { if (config[i] == "hier") { m_phraseBased = false; - } - else if (config[i] == "phrase") { + } else if (config[i] == "phrase") { m_phraseBased = true; - } - else if (config[i] == "wbe") { + } else if (config[i] == "wbe") { m_phraseBased = true; } // no word-based decoding available, fall-back to phrase-based @@ -58,45 +56,36 @@ LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) : else if (config[i] == "msd") { m_modelType = MSD; - } - else if (config[i] == "mslr") { + } else if (config[i] == "mslr") { m_modelType = MSLR; - } - else if (config[i] == "monotonicity") { + } else if (config[i] == "monotonicity") { m_modelType = Monotonic; - } - else if (config[i] == "leftright") { + } else if (config[i] == "leftright") { m_modelType = LeftRight; } // unidirectional is deprecated, use backward instead else if (config[i] == "unidirectional") { m_direction = Backward; - } - else if (config[i] == "backward") { + } else if (config[i] == "backward") { m_direction = Backward; - } - else if (config[i] == "forward") { + } else if (config[i] == "forward") { m_direction = Forward; - } - else if (config[i] == "bidirectional") { + } else if (config[i] == "bidirectional") { m_direction = Bidirectional; } else if (config[i] == "f") { m_condition = F; - } - else if (config[i] == "fe") { + } else if (config[i] == "fe") { m_condition = FE; } else if (config[i] == "collapseff") { m_collapseScores = true; - } - else if (config[i] == "allff") { + } else if (config[i] == "allff") { m_collapseScores = false; - } - else { + } else { std::cerr << "Illegal part in the lexical reordering configuration string: " << config[i] << std::endl; @@ -106,7 +95,7 @@ LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) : if (m_modelType == None) { std::cerr << "You need to specify the type of the reordering model " - << "(msd, monotonicity,...)" << std::endl; + << "(msd, monotonicity,...)" << std::endl; exit(1); } @@ -135,19 +124,19 @@ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev, { UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified"); return ( - (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L - : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M : - (m_modelType == Monotonic) ? NM : - (prev.GetStartPos() == cur.GetEndPos() + 1) ? S : - (m_modelType == MSD) ? D : - (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL); + (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L + : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M : + (m_modelType == Monotonic) ? NM : + (prev.GetStartPos() == cur.GetEndPos() + 1) ? S : + (m_modelType == MSD) ? D : + (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL); } LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const { // this one is for HierarchicalReorderingBackwardState return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L - : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM : + : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM : (reoDistance == -1) ? S : (m_modelType == MSD) ? D : (reoDistance > 1) ? DR : DL); } @@ -162,28 +151,26 @@ LRState *LRModel::CreateLRState(MemPool &pool) const case Bidirectional: if (m_phraseBased) { bwd = - new (pool.Allocate()) PhraseBasedReorderingState( - *this, Backward, offset); + new (pool.Allocate()) PhraseBasedReorderingState( + *this, Backward, offset); //cerr << "bwd=" << bwd << bwd->ToString() << endl; - } - else { + } else { bwd = - new (pool.Allocate()) HReorderingBackwardState( - pool, *this, offset); + new (pool.Allocate()) HReorderingBackwardState( + pool, *this, offset); } offset += m_collapseScores ? 1 : GetNumberOfTypes(); if (m_direction == Backward) return bwd; // else fall through case Forward: if (m_phraseBased) { fwd = - new (pool.Allocate()) PhraseBasedReorderingState( - *this, Forward, offset); + new (pool.Allocate()) PhraseBasedReorderingState( + *this, Forward, offset); //cerr << "fwd=" << fwd << fwd->ToString() << endl; - } - else { + } else { fwd = - new (pool.Allocate()) HReorderingForwardState( - *this, offset); + new (pool.Allocate()) HReorderingForwardState( + *this, offset); } offset += m_collapseScores ? 1 : GetNumberOfTypes(); if (m_direction == Forward) return fwd; @@ -191,8 +178,8 @@ LRState *LRModel::CreateLRState(MemPool &pool) const //cerr << "LRStates:" << *bwd << endl << *fwd << endl; BidirectionalReorderingState *ret = - new (pool.Allocate()) BidirectionalReorderingState( - *this, bwd, fwd, 0); + new (pool.Allocate()) BidirectionalReorderingState( + *this, bwd, fwd, 0); return ret; } @@ -200,10 +187,10 @@ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev, Range const& cur, Bitmap const& cov) const { return ( - (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L - : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM : - IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D : - cur.GetStartPos() > prev.GetEndPos() ? DR : DL); + (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L + : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM : + IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D : + cur.GetStartPos() > prev.GetEndPos() ? DR : DL); } } /* namespace Moses2 */ diff --git a/moses2/FF/LexicalReordering/LRModel.h b/moses2/FF/LexicalReordering/LRModel.h index 2713fa46d..0309d5386 100644 --- a/moses2/FF/LexicalReordering/LRModel.h +++ b/moses2/FF/LexicalReordering/LRModel.h @@ -19,21 +19,17 @@ class LexicalReordering; class LRModel { public: - enum ModelType - { + enum ModelType { Monotonic, MSD, MSLR, LeftRight, None }; - enum Direction - { + enum Direction { Forward, Backward, Bidirectional }; - enum Condition - { + enum Condition { F, E, FE }; - enum ReorderingType - { + enum ReorderingType { M = 0, // monotonic NM = 1, // non-monotonic S = 1, // swap @@ -49,34 +45,28 @@ public: LRModel(const std::string &modelType, LexicalReordering &ff); virtual ~LRModel(); - ModelType GetModelType() const - { + ModelType GetModelType() const { return m_modelType; } - Direction GetDirection() const - { + Direction GetDirection() const { return m_direction; } - Condition GetCondition() const - { + Condition GetCondition() const { return m_condition; } - bool IsPhraseBased() const - { + bool IsPhraseBased() const { return m_phraseBased; } - bool CollapseScores() const - { + bool CollapseScores() const { return m_collapseScores; } size_t GetNumberOfTypes() const; LexicalReordering* - GetScoreProducer() const - { + GetScoreProducer() const { return m_scoreProducer; } diff --git a/moses2/FF/LexicalReordering/LRState.cpp b/moses2/FF/LexicalReordering/LRState.cpp index 4e9abd774..a8a3bf6d0 100644 --- a/moses2/FF/LexicalReordering/LRState.cpp +++ b/moses2/FF/LexicalReordering/LRState.cpp @@ -17,7 +17,7 @@ namespace Moses2 class InputType; LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) : - m_configuration(config), m_direction(dir), m_offset(offset) + m_configuration(config), m_direction(dir), m_offset(offset) { } @@ -43,15 +43,15 @@ int LRState::ComparePrevScores(const TargetPhrase *other) const } void LRState::CopyScores(const System &system, Scores &accum, - const TargetPhrase &topt, ReorderingType reoType) const + const TargetPhrase &topt, ReorderingType reoType) const { // don't call this on a bidirectional object UTIL_THROW_IF2( - m_direction != LRModel::Backward && m_direction != LRModel::Forward, - "Unknown direction: " << m_direction); + m_direction != LRModel::Backward && m_direction != LRModel::Forward, + "Unknown direction: " << m_direction); TargetPhrase const* relevantOpt = ( - (m_direction == LRModel::Backward) ? &topt : prevTP); + (m_direction == LRModel::Backward) ? &topt : prevTP); LexicalReordering* producer = m_configuration.GetScoreProducer(); size_t phraseTableInd = producer->GetPhraseTableInd(); @@ -65,7 +65,7 @@ void LRState::CopyScores(const System &system, Scores &accum, size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote; UTIL_THROW_IF2(off_local >= producer->GetNumScores(), - "offset out of vector bounds!"); + "offset out of vector bounds!"); // look up applicable score from vector of scores //UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!"); diff --git a/moses2/FF/LexicalReordering/LRState.h b/moses2/FF/LexicalReordering/LRState.h index 0e906d09a..c53b9de78 100644 --- a/moses2/FF/LexicalReordering/LRState.h +++ b/moses2/FF/LexicalReordering/LRState.h @@ -26,14 +26,14 @@ public: LRState(const LRModel &config, LRModel::Direction dir, size_t offset); virtual void Init(const LRState *prev, const TargetPhrase &topt, - const InputPathBase &path, bool first, const Bitmap *coverage) = 0; + const InputPathBase &path, bool first, const Bitmap *coverage) = 0; virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff, - const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, - FFState &state) const = 0; + const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, + FFState &state) const = 0; void CopyScores(const System &system, Scores &accum, const TargetPhrase &topt, - ReorderingType reoType) const; + ReorderingType reoType) const; protected: const LRModel& m_configuration; diff --git a/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp index 97394ce84..dd5529640 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses2/FF/LexicalReordering/LexicalReordering.cpp @@ -30,8 +30,8 @@ namespace Moses2 /////////////////////////////////////////////////////////////////////// LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank( - NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL) + StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank( + NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL) { ReadParameters(); assert(m_configuration); @@ -51,13 +51,11 @@ void LexicalReordering::Load(System &system) if (m_propertyInd >= 0) { // Using integrate Lex RO. No loading needed - } - else if (FileExists(m_path + ".minlexr")) { + } else if (FileExists(m_path + ".minlexr")) { m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr", m_FactorsF, m_FactorsE, m_FactorsC); m_blank = new (pool.Allocate()) PhraseImpl(pool, 0); - } - else { + } else { m_coll = new Coll(); InputFileStream file(m_path); string line; @@ -71,12 +69,12 @@ void LexicalReordering::Load(System &system) std::vector toks = TokenizeMultiCharSeparator(line, "|||"); assert(toks.size() == 3); PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(), - system, toks[0]); + system, toks[0]); PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(), - system, toks[1]); + system, toks[1]); std::vector scores = Tokenize(toks[2]); std::transform(scores.begin(), scores.end(), scores.begin(), - TransformScore); + TransformScore); std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore); Key key(source, target); @@ -86,24 +84,19 @@ void LexicalReordering::Load(System &system) } void LexicalReordering::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { if (key == "path") { m_path = value; - } - else if (key == "type") { + } else if (key == "type") { m_configuration = new LRModel(value, *this); - } - else if (key == "input-factor") { + } else if (key == "input-factor") { m_FactorsF = Tokenize(value); - } - else if (key == "output-factor") { + } else if (key == "output-factor") { m_FactorsE = Tokenize(value); - } - else if (key == "property-index") { + } else if (key == "property-index") { m_propertyInd = Scan(value); - } - else { + } else { StatefulFeatureFunction::SetParameter(key, value); } } @@ -119,9 +112,9 @@ void LexicalReordering::EmptyHypothesisState(FFState &state, const Hypothesis &hypo) const { BidirectionalReorderingState &stateCast = - static_cast(state); + static_cast(state); stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true, - &hypo.GetBitmap()); + &hypo.GetBitmap()); } void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, @@ -141,9 +134,9 @@ void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps, const Phrase &sourcePhrase) const { - BOOST_FOREACH(const TargetPhraseImpl *tp, tps){ - EvaluateAfterTablePruning(pool, *tp, sourcePhrase); -} + BOOST_FOREACH(const TargetPhraseImpl *tp, tps) { + EvaluateAfterTablePruning(pool, *tp, sourcePhrase); + } } void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, @@ -152,11 +145,10 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, if (m_propertyInd >= 0) { SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd); targetPhrase.ffData[m_PhraseTableInd] = scoreArr; - } - else if (m_compactModel) { + } else if (m_compactModel) { // using external compact binary model const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase, - *m_blank); + *m_blank); if (values.size()) { assert(values.size() == m_numScores); @@ -165,12 +157,10 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, scoreArr[i] = values[i]; } targetPhrase.ffData[m_PhraseTableInd] = scoreArr; - } - else { + } else { targetPhrase.ffData[m_PhraseTableInd] = NULL; } - } - else if (m_coll) { + } else if (m_coll) { // using external memory model // cache data in target phrase @@ -183,8 +173,7 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, scoreArr[i] = (*values)[i]; } targetPhrase.ffData[m_PhraseTableInd] = scoreArr; - } - else { + } else { targetPhrase.ffData[m_PhraseTableInd] = NULL; } } @@ -199,15 +188,14 @@ void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr, } const LexicalReordering::Values *LexicalReordering::GetValues( - const Phrase &source, const Phrase &target) const + const Phrase &source, const Phrase &target) const { Key key(&source, &target); Coll::const_iterator iter; iter = m_coll->find(key); if (iter == m_coll->end()) { return NULL; - } - else { + } else { return &iter->second; } } diff --git a/moses2/FF/LexicalReordering/LexicalReordering.h b/moses2/FF/LexicalReordering/LexicalReordering.h index b14517db2..da8e7780f 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.h +++ b/moses2/FF/LexicalReordering/LexicalReordering.h @@ -30,36 +30,35 @@ public: virtual void SetParameter(const std::string& key, const std::string& value); - virtual size_t HasPhraseTableInd() const - { + virtual size_t HasPhraseTableInd() const { return true; } virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps, - const Phrase &sourcePhrase) const; + const Phrase &sourcePhrase) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; protected: std::string m_path; @@ -71,7 +70,7 @@ protected: virtual void EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase, - const Phrase &sourcePhrase) const; + const Phrase &sourcePhrase) const; // PROPERTY IN PT int m_propertyInd; @@ -84,17 +83,14 @@ protected: typedef std::pair*, const Phrase* > Key; typedef std::vector Values; - struct KeyComparer - { - size_t operator()(const Key &obj) const - { + struct KeyComparer { + size_t operator()(const Key &obj) const { size_t seed = obj.first->hash(); boost::hash_combine(seed, obj.second->hash()); return seed; } - bool operator()(const Key& a, const Key& b) const - { + bool operator()(const Key& a, const Key& b) const { if ((*a.first) != (*b.first)) { return false; } diff --git a/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp index c7d4abf03..6b8060021 100644 --- a/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp +++ b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp @@ -18,7 +18,7 @@ namespace Moses2 PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir, size_t offset) : - LRState(config, dir, offset) + LRState(config, dir, offset) { // uninitialised prevPath = NULL; @@ -26,8 +26,8 @@ PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config, } void PhraseBasedReorderingState::Init(const LRState *prev, - const TargetPhrase &topt, const InputPathBase &path, bool first, - const Bitmap *coverage) + const TargetPhrase &topt, const InputPathBase &path, bool first, + const Bitmap *coverage) { prevTP = &topt; prevPath = &path; @@ -48,39 +48,37 @@ bool PhraseBasedReorderingState::operator==(const FFState& o) const if (&o == this) return true; const PhraseBasedReorderingState &other = - static_cast(o); + static_cast(o); if (&prevPath->range == &other.prevPath->range) { if (m_direction == LRModel::Forward) { int compareScore = ComparePrevScores(other.prevTP); return compareScore == 0; - } - else { + } else { return true; } - } - else { + } else { return false; } } void PhraseBasedReorderingState::Expand(const ManagerBase &mgr, - const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, - Scores &scores, FFState &state) const + const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd, + Scores &scores, FFState &state) const { if ((m_direction != LRModel::Forward) || !m_first) { LRModel const& lrmodel = m_configuration; Range const &cur = hypo.GetInputPath().range; LRModel::ReorderingType reoType = ( - m_first ? - lrmodel.GetOrientation(cur) : - lrmodel.GetOrientation(prevPath->range, cur)); + m_first ? + lrmodel.GetOrientation(cur) : + lrmodel.GetOrientation(prevPath->range, cur)); CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType); } PhraseBasedReorderingState &stateCast = - static_cast(state); + static_cast(state); stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false, - NULL); + NULL); } } /* namespace Moses2 */ diff --git a/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h index e26237cf7..77994e477 100644 --- a/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h +++ b/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h @@ -20,22 +20,21 @@ public: bool m_first; PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir, - size_t offset); + size_t offset); void Init(const LRState *prev, const TargetPhrase &topt, - const InputPathBase &path, bool first, const Bitmap *coverage); + const InputPathBase &path, bool first, const Bitmap *coverage); size_t hash() const; virtual bool operator==(const FFState& other) const; - virtual std::string ToString() const - { + virtual std::string ToString() const { return "PhraseBasedReorderingState"; } void Expand(const ManagerBase &mgr, const LexicalReordering &ff, - const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, size_t phraseTableInd, Scores &scores, + FFState &state) const; protected: diff --git a/moses2/FF/LexicalReordering/ReorderingStack.cpp b/moses2/FF/LexicalReordering/ReorderingStack.cpp index 298257fc4..6a4bf3c33 100644 --- a/moses2/FF/LexicalReordering/ReorderingStack.cpp +++ b/moses2/FF/LexicalReordering/ReorderingStack.cpp @@ -11,7 +11,7 @@ namespace Moses2 { ReorderingStack::ReorderingStack(MemPool &pool) : - m_stack(pool) + m_stack(pool) { } @@ -50,8 +50,7 @@ int ReorderingStack::ShiftReduce(const Range &input_span) //calculate the distance we are returning if (input_span.GetStartPos() > prev_span.GetStartPos()) { distance = input_span.GetStartPos() - prev_span.GetEndPos(); - } - else { + } else { distance = input_span.GetEndPos() - prev_span.GetStartPos(); } @@ -59,13 +58,11 @@ int ReorderingStack::ShiftReduce(const Range &input_span) m_stack.pop_back(); Range new_span(prev_span.GetStartPos(), input_span.GetEndPos()); Reduce(new_span); - } - else if (distance == -1) { //swap + } else if (distance == -1) { //swap m_stack.pop_back(); Range new_span(input_span.GetStartPos(), prev_span.GetEndPos()); Reduce(new_span); - } - else { // discontinuous + } else { // discontinuous m_stack.push_back(input_span); } @@ -85,13 +82,11 @@ void ReorderingStack::Reduce(Range current) m_stack.pop_back(); Range t(previous.GetStartPos(), current.GetEndPos()); current = t; - } - else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge + } else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge m_stack.pop_back(); Range t(current.GetStartPos(), previous.GetEndPos()); current = t; - } - else { // discontinuous, no more merging + } else { // discontinuous, no more merging cont_loop = false; } } // finished reducing, exit diff --git a/moses2/FF/OSM/OpSequenceModel.cpp b/moses2/FF/OSM/OpSequenceModel.cpp index 572065813..093e5d819 100644 --- a/moses2/FF/OSM/OpSequenceModel.cpp +++ b/moses2/FF/OSM/OpSequenceModel.cpp @@ -17,7 +17,7 @@ namespace Moses2 //////////////////////////////////////////////////////////////////////////////////////// OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line) + StatefulFeatureFunction(startInd, line) { sFactor = 0; tFactor = 0; @@ -96,7 +96,7 @@ void OpSequenceModel::EvaluateInIsolation(MemPool &pool, obj.populateScores(scoresVec,numFeatures); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - scoresVec.data()); + scoresVec.data()); estimatedScore += weightedScore; } diff --git a/moses2/FF/OSM/OpSequenceModel.h b/moses2/FF/OSM/OpSequenceModel.h index d46cc82fb..f8b99e95c 100644 --- a/moses2/FF/OSM/OpSequenceModel.h +++ b/moses2/FF/OSM/OpSequenceModel.h @@ -16,32 +16,32 @@ public: int tFactor; // Target Factor ... util::LoadMethod load_method; // method to load model - OpSequenceModel(size_t startInd, const std::string &line); + OpSequenceModel(size_t startInd, const std::string &line); virtual ~OpSequenceModel(); virtual void Load(System &system); virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; void SetParameter(const std::string& key, const std::string& value); diff --git a/moses2/FF/OSM/osmHyp.h b/moses2/FF/OSM/osmHyp.h index c2893d366..338b73ec2 100644 --- a/moses2/FF/OSM/osmHyp.h +++ b/moses2/FF/OSM/osmHyp.h @@ -22,8 +22,9 @@ public: virtual size_t hash() const; virtual bool operator==(const FFState& other) const; - virtual std::string ToString() const - { return "osmState"; } + virtual std::string ToString() const { + return "osmState"; + } void saveState(int jVal, int eVal, std::map & gapVal); int getJ()const { diff --git a/moses2/FF/PhrasePenalty.cpp b/moses2/FF/PhrasePenalty.cpp index 2a1764a0e..84087740d 100644 --- a/moses2/FF/PhrasePenalty.cpp +++ b/moses2/FF/PhrasePenalty.cpp @@ -12,7 +12,7 @@ namespace Moses2 { PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) : - StatelessFeatureFunction(startInd, line) + StatelessFeatureFunction(startInd, line) { ReadParameters(); } @@ -23,15 +23,15 @@ PhrasePenalty::~PhrasePenalty() } void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { scores.PlusEquals(system, *this, 1); } void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { scores.PlusEquals(system, *this, 1); } diff --git a/moses2/FF/PhrasePenalty.h b/moses2/FF/PhrasePenalty.h index c2066356c..855bdbf09 100644 --- a/moses2/FF/PhrasePenalty.h +++ b/moses2/FF/PhrasePenalty.h @@ -20,13 +20,13 @@ public: virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; }; diff --git a/moses2/FF/PointerState.cpp b/moses2/FF/PointerState.cpp index e69de29bb..facb0a2f9 100644 --- a/moses2/FF/PointerState.cpp +++ b/moses2/FF/PointerState.cpp @@ -0,0 +1,6 @@ +#include "PointerState.h" + +namespace Moses2 +{ + +} diff --git a/moses2/FF/PointerState.h b/moses2/FF/PointerState.h index 41e6edf9f..a73b57650 100644 --- a/moses2/FF/PointerState.h +++ b/moses2/FF/PointerState.h @@ -6,31 +6,25 @@ namespace Moses2 { -struct PointerState: public FFState -{ +struct PointerState: public FFState { const void* lmstate; - explicit PointerState() - { + explicit PointerState() { // uninitialised } - PointerState(const void* lms) - { + PointerState(const void* lms) { lmstate = lms; } - virtual size_t hash() const - { + virtual size_t hash() const { return (size_t) lmstate; } - virtual bool operator==(const FFState& other) const - { + virtual bool operator==(const FFState& other) const { const PointerState& o = static_cast(other); return lmstate == o.lmstate; } - virtual std::string ToString() const - { + virtual std::string ToString() const { std::stringstream sb; sb << lmstate; return sb.str(); diff --git a/moses2/FF/SkeletonStatefulFF.cpp b/moses2/FF/SkeletonStatefulFF.cpp index d159794f0..c4c2b7329 100644 --- a/moses2/FF/SkeletonStatefulFF.cpp +++ b/moses2/FF/SkeletonStatefulFF.cpp @@ -19,23 +19,19 @@ class SkeletonState: public FFState public: int targetLen; - SkeletonState() - { + SkeletonState() { // uninitialised } - virtual size_t hash() const - { + virtual size_t hash() const { return (size_t) targetLen; } - virtual bool operator==(const FFState& o) const - { + virtual bool operator==(const FFState& o) const { const SkeletonState& other = static_cast(o); return targetLen == other.targetLen; } - virtual std::string ToString() const - { + virtual std::string ToString() const { stringstream sb; sb << targetLen; return sb.str(); @@ -45,7 +41,7 @@ public: //////////////////////////////////////////////////////////////////////////////////////// SkeletonStatefulFF::SkeletonStatefulFF(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line) + StatefulFeatureFunction(startInd, line) { ReadParameters(); } diff --git a/moses2/FF/SkeletonStatefulFF.h b/moses2/FF/SkeletonStatefulFF.h index bfa3ad870..79256f2b3 100644 --- a/moses2/FF/SkeletonStatefulFF.h +++ b/moses2/FF/SkeletonStatefulFF.h @@ -21,25 +21,25 @@ public: virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; }; diff --git a/moses2/FF/SkeletonStatelessFF.cpp b/moses2/FF/SkeletonStatelessFF.cpp index 981f9dd75..4875f155d 100644 --- a/moses2/FF/SkeletonStatelessFF.cpp +++ b/moses2/FF/SkeletonStatelessFF.cpp @@ -13,7 +13,7 @@ namespace Moses2 SkeletonStatelessFF::SkeletonStatelessFF(size_t startInd, const std::string &line) : - StatelessFeatureFunction(startInd, line) + StatelessFeatureFunction(startInd, line) { ReadParameters(); } diff --git a/moses2/FF/SkeletonStatelessFF.h b/moses2/FF/SkeletonStatelessFF.h index 9be14bffe..f7e95005f 100644 --- a/moses2/FF/SkeletonStatelessFF.h +++ b/moses2/FF/SkeletonStatelessFF.h @@ -20,13 +20,13 @@ public: virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; }; diff --git a/moses2/FF/StatefulFeatureFunction.cpp b/moses2/FF/StatefulFeatureFunction.cpp index 060338159..d120a6bc4 100644 --- a/moses2/FF/StatefulFeatureFunction.cpp +++ b/moses2/FF/StatefulFeatureFunction.cpp @@ -20,7 +20,7 @@ namespace Moses2 StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line) : - FeatureFunction(startInd, line) + FeatureFunction(startInd, line) { } @@ -30,10 +30,10 @@ StatefulFeatureFunction::~StatefulFeatureFunction() } void StatefulFeatureFunction::EvaluateWhenAppliedBatch( - const System &system, - const Batch &batch) const + const System &system, + const Batch &batch) const { - //cerr << "EvaluateWhenAppliedBatch:" << m_name << endl; + //cerr << "EvaluateWhenAppliedBatch:" << m_name << endl; #ifdef __linux /* pthread_t handle; diff --git a/moses2/FF/StatefulFeatureFunction.h b/moses2/FF/StatefulFeatureFunction.h index fffb1eea7..7cb3eaae9 100644 --- a/moses2/FF/StatefulFeatureFunction.h +++ b/moses2/FF/StatefulFeatureFunction.h @@ -30,12 +30,10 @@ public: StatefulFeatureFunction(size_t startInd, const std::string &line); virtual ~StatefulFeatureFunction(); - void SetStatefulInd(size_t ind) - { + void SetStatefulInd(size_t ind) { m_statefulInd = ind; } - size_t GetStatefulInd() const - { + size_t GetStatefulInd() const { return m_statefulInd; } @@ -44,19 +42,19 @@ public: //! return the state associated with the empty hypothesis for a given sentence virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const = 0; + const InputType &input, const Hypothesis &hypo) const = 0; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const = 0; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const = 0; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const = 0; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const = 0; virtual void EvaluateWhenAppliedBatch( - const System &system, - const Batch &batch) const; + const System &system, + const Batch &batch) const; protected: size_t m_statefulInd; diff --git a/moses2/FF/StatelessFeatureFunction.cpp b/moses2/FF/StatelessFeatureFunction.cpp index 62fa35d3f..c73d8907c 100644 --- a/moses2/FF/StatelessFeatureFunction.cpp +++ b/moses2/FF/StatelessFeatureFunction.cpp @@ -12,7 +12,7 @@ namespace Moses2 StatelessFeatureFunction::StatelessFeatureFunction(size_t startInd, const std::string &line) : - FeatureFunction(startInd, line) + FeatureFunction(startInd, line) { // TODO Auto-generated constructor stub diff --git a/moses2/FF/WordPenalty.cpp b/moses2/FF/WordPenalty.cpp index e8af47568..576820539 100644 --- a/moses2/FF/WordPenalty.cpp +++ b/moses2/FF/WordPenalty.cpp @@ -17,7 +17,7 @@ namespace Moses2 { WordPenalty::WordPenalty(size_t startInd, const std::string &line) : - StatelessFeatureFunction(startInd, line) + StatelessFeatureFunction(startInd, line) { ReadParameters(); } @@ -28,16 +28,16 @@ WordPenalty::~WordPenalty() } void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { SCORE score = -(SCORE) targetPhrase.GetSize(); scores.PlusEquals(system, *this, score); } void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { size_t count = 0; for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { diff --git a/moses2/FF/WordPenalty.h b/moses2/FF/WordPenalty.h index c322a15f7..acd1bb873 100644 --- a/moses2/FF/WordPenalty.h +++ b/moses2/FF/WordPenalty.h @@ -21,13 +21,13 @@ public: virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; }; diff --git a/moses2/HypothesisBase.cpp b/moses2/HypothesisBase.cpp index 8b65a0cdf..c124866d1 100644 --- a/moses2/HypothesisBase.cpp +++ b/moses2/HypothesisBase.cpp @@ -29,15 +29,15 @@ HypothesisBase::HypothesisBase(MemPool &pool, const System &system) // FF states const std::vector &sfffs = - system.featureFunctions.GetStatefulFeatureFunctions(); + system.featureFunctions.GetStatefulFeatureFunctions(); size_t numStatefulFFs = sfffs.size(); m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs); - BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){ - size_t statefulInd = sfff->GetStatefulInd(); - FFState *state = sfff->BlankState(pool, system); - m_ffStates[statefulInd] = state; -} + BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) { + size_t statefulInd = sfff->GetStatefulInd(); + FFState *state = sfff->BlankState(pool, system); + m_ffStates[statefulInd] = state; + } } size_t HypothesisBase::hash() const @@ -48,7 +48,7 @@ size_t HypothesisBase::hash() const size_t HypothesisBase::hash(size_t seed) const { size_t numStatefulFFs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size(); + GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size(); // states for (size_t i = 0; i < numStatefulFFs; ++i) { @@ -63,7 +63,7 @@ size_t HypothesisBase::hash(size_t seed) const bool HypothesisBase::operator==(const HypothesisBase &other) const { size_t numStatefulFFs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size(); + GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size(); // states for (size_t i = 0; i < numStatefulFFs; ++i) { diff --git a/moses2/HypothesisBase.h b/moses2/HypothesisBase.h index 6ef4d3891..557479906 100644 --- a/moses2/HypothesisBase.h +++ b/moses2/HypothesisBase.h @@ -20,28 +20,31 @@ class Scores; class HypothesisBase { public: - virtual ~HypothesisBase() - { + virtual ~HypothesisBase() { } - inline ManagerBase &GetManager() const - { + inline ManagerBase &GetManager() const { return *m_mgr; } template - const T &Cast() const - { return static_cast(*this); } + const T &Cast() const { + return static_cast(*this); + } - const Scores &GetScores() const - { return *m_scores; } - Scores &GetScores() - { return *m_scores; } + const Scores &GetScores() const { + return *m_scores; + } + Scores &GetScores() { + return *m_scores; + } - const FFState *GetState(size_t ind) const - { return m_ffStates[ind]; } - FFState *GetState(size_t ind) - { return m_ffStates[ind]; } + const FFState *GetState(size_t ind) const { + return m_ffStates[ind]; + } + FFState *GetState(size_t ind) { + return m_ffStates[ind]; + } virtual size_t hash() const; virtual size_t hash(size_t seed) const; @@ -64,8 +67,7 @@ protected: class HypothesisFutureScoreOrderer { public: - bool operator()(const HypothesisBase* a, const HypothesisBase* b) const - { + bool operator()(const HypothesisBase* a, const HypothesisBase* b) const { return a->GetFutureScore() > b->GetFutureScore(); } }; diff --git a/moses2/HypothesisColl.cpp b/moses2/HypothesisColl.cpp index a75113d58..b33ba7835 100644 --- a/moses2/HypothesisColl.cpp +++ b/moses2/HypothesisColl.cpp @@ -19,8 +19,8 @@ namespace Moses2 { HypothesisColl::HypothesisColl(const ManagerBase &mgr) -:m_coll(MemPoolAllocator(mgr.GetPool())) -,m_sortedHypos(NULL) + :m_coll(MemPoolAllocator(mgr.GetPool())) + ,m_sortedHypos(NULL) { m_bestScore = -std::numeric_limits::infinity(); m_worstScore = std::numeric_limits::infinity(); @@ -28,29 +28,29 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) const HypothesisBase *HypothesisColl::GetBestHypo() const { - if (GetSize() == 0) { - return NULL; - } - if (m_sortedHypos) { - return (*m_sortedHypos)[0]; - } + if (GetSize() == 0) { + return NULL; + } + if (m_sortedHypos) { + return (*m_sortedHypos)[0]; + } - SCORE bestScore = -std::numeric_limits::infinity(); - const HypothesisBase *bestHypo; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { - if (hypo->GetFutureScore() > bestScore) { - bestScore = hypo->GetFutureScore(); - bestHypo = hypo; - } - } - return bestHypo; + SCORE bestScore = -std::numeric_limits::infinity(); + const HypothesisBase *bestHypo; + BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { + if (hypo->GetFutureScore() > bestScore) { + bestScore = hypo->GetFutureScore(); + bestHypo = hypo; + } + } + return bestHypo; } void HypothesisColl::Add( - const ManagerBase &mgr, - HypothesisBase *hypo, - Recycler &hypoRecycle, - ArcLists &arcLists) + const ManagerBase &mgr, + HypothesisBase *hypo, + Recycler &hypoRecycle, + ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; @@ -76,105 +76,100 @@ void HypothesisColl::Add( return; } - StackAdd added = Add(hypo); + StackAdd added = Add(hypo); - size_t nbestSize = mgr.system.options.nbest.nbest_size; - if (nbestSize) { - arcLists.AddArc(added.added, hypo, added.other); - } - else { - if (added.added) { + size_t nbestSize = mgr.system.options.nbest.nbest_size; + if (nbestSize) { + arcLists.AddArc(added.added, hypo, added.other); + } else { + if (added.added) { if (added.other) { hypoRecycle.Recycle(added.other); } - } - else { + } else { hypoRecycle.Recycle(hypo); - } - } + } + } // update beam variables - if (added.added) { + if (added.added) { if (futureScore > m_bestScore) { m_bestScore = futureScore; float beamWidth = mgr.system.options.search.beam_width; if ( m_bestScore + beamWidth > m_worstScore ) { m_worstScore = m_bestScore + beamWidth; } - } - else if (GetSize() <= maxStackSize && futureScore < m_worstScore) { + } else if (GetSize() <= maxStackSize && futureScore < m_worstScore) { m_worstScore = futureScore; } - } + } } StackAdd HypothesisColl::Add(const HypothesisBase *hypo) { - std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo); - //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl; + std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo); + //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl; - // CHECK RECOMBINATION - if (addRet.second) { - // equiv hypo doesn't exists + // CHECK RECOMBINATION + if (addRet.second) { + // equiv hypo doesn't exists //cerr << "Added " << hypo << endl; - return StackAdd(true, NULL); - } - else { - HypothesisBase *hypoExisting = const_cast(*addRet.first); - //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl; + return StackAdd(true, NULL); + } else { + HypothesisBase *hypoExisting = const_cast(*addRet.first); + //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl; - if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) { - // incoming hypo is better than the one we have - const HypothesisBase * const &hypoExisting1 = *addRet.first; - const HypothesisBase *&hypoExisting2 = - const_cast(hypoExisting1); - hypoExisting2 = hypo; + if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) { + // incoming hypo is better than the one we have + const HypothesisBase * const &hypoExisting1 = *addRet.first; + const HypothesisBase *&hypoExisting2 = + const_cast(hypoExisting1); + hypoExisting2 = hypo; //cerr << "Added " << hypo << " dicard existing " << hypoExisting2 << endl; - return StackAdd(true, hypoExisting); - } - else { - // already storing the best hypo. discard incoming hypo + return StackAdd(true, hypoExisting); + } else { + // already storing the best hypo. discard incoming hypo //cerr << "Keep existing " << hypoExisting << " dicard new " << hypo << endl; - return StackAdd(false, hypoExisting); - } - } + return StackAdd(false, hypoExisting); + } + } - //assert(false); + //assert(false); } const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos( - const ManagerBase &mgr, - ArcLists &arcLists) const + const ManagerBase &mgr, + ArcLists &arcLists) const { - if (m_sortedHypos == NULL) { - // create sortedHypos first - MemPool &pool = mgr.GetPool(); - m_sortedHypos = new (pool.Allocate()) Hypotheses(pool, - m_coll.size()); + if (m_sortedHypos == NULL) { + // create sortedHypos first + MemPool &pool = mgr.GetPool(); + m_sortedHypos = new (pool.Allocate()) Hypotheses(pool, + m_coll.size()); - SortHypos(mgr, m_sortedHypos->GetArray()); + SortHypos(mgr, m_sortedHypos->GetArray()); - // prune - Recycler &recycler = mgr.GetHypoRecycle(); + // prune + Recycler &recycler = mgr.GetHypoRecycle(); - size_t maxStackSize = mgr.system.options.search.stack_size; - if (maxStackSize && m_sortedHypos->size() > maxStackSize) { - for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) { - HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); - recycler.Recycle(hypo); + size_t maxStackSize = mgr.system.options.search.stack_size; + if (maxStackSize && m_sortedHypos->size() > maxStackSize) { + for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) { + HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); + recycler.Recycle(hypo); - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - } - m_sortedHypos->resize(maxStackSize); - } + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + } + m_sortedHypos->resize(maxStackSize); + } - } + } - return *m_sortedHypos; + return *m_sortedHypos; } void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) @@ -221,7 +216,7 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so cerr << endl; */ size_t ind = 0; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ + BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { sortedHypos[ind] = hypo; ++ind; } @@ -229,11 +224,9 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so size_t indMiddle; if (maxStackSize == 0) { indMiddle = GetSize(); - } - else if (GetSize() > maxStackSize) { + } else if (GetSize() > maxStackSize) { indMiddle = maxStackSize; - } - else { + } else { // GetSize() <= maxStackSize indMiddle = GetSize(); } @@ -241,10 +234,10 @@ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **so const HypothesisBase **iterMiddle = sortedHypos + indMiddle; std::partial_sort( - sortedHypos, - iterMiddle, - sortedHypos + GetSize(), - HypothesisFutureScoreOrderer()); + sortedHypos, + iterMiddle, + sortedHypos + GetSize(), + HypothesisFutureScoreOrderer()); /* cerr << "sorted hypos: "; @@ -266,8 +259,8 @@ void HypothesisColl::Delete(const HypothesisBase *hypo) void HypothesisColl::Clear() { - m_sortedHypos = NULL; - m_coll.clear(); + m_sortedHypos = NULL; + m_coll.clear(); m_bestScore = -std::numeric_limits::infinity(); m_worstScore = std::numeric_limits::infinity(); @@ -275,13 +268,13 @@ void HypothesisColl::Clear() std::string HypothesisColl::Debug(const System &system) const { - stringstream out; - BOOST_FOREACH (const HypothesisBase *hypo, m_coll) { - out << hypo->Debug(system); - out << std::endl << std::endl; - } + stringstream out; + BOOST_FOREACH (const HypothesisBase *hypo, m_coll) { + out << hypo->Debug(system); + out << std::endl << std::endl; + } - return out.str(); + return out.str(); } } /* namespace Moses2 */ diff --git a/moses2/HypothesisColl.h b/moses2/HypothesisColl.h index 81a3b25c3..63a8551ba 100644 --- a/moses2/HypothesisColl.h +++ b/moses2/HypothesisColl.h @@ -26,24 +26,24 @@ public: HypothesisColl(const ManagerBase &mgr); void Add(const ManagerBase &mgr, - HypothesisBase *hypo, - Recycler &hypoRecycle, - ArcLists &arcLists); + HypothesisBase *hypo, + Recycler &hypoRecycle, + ArcLists &arcLists); - size_t GetSize() const - { return m_coll.size(); } + size_t GetSize() const { + return m_coll.size(); + } void Clear(); const Hypotheses &GetSortedAndPrunedHypos( - const ManagerBase &mgr, - ArcLists &arcLists) const; + const ManagerBase &mgr, + ArcLists &arcLists) const; const HypothesisBase *GetBestHypo() const; template - const T *GetBestHypo() const - { + const T *GetBestHypo() const { const HypothesisBase *hypo = GetBestHypo(); return hypo ? &hypo->Cast() : NULL; } @@ -54,8 +54,8 @@ public: protected: typedef boost::unordered_set, UnorderedComparer, - MemPoolAllocator > _HCType; + UnorderedComparer, UnorderedComparer, + MemPoolAllocator > _HCType; _HCType m_coll; mutable Hypotheses *m_sortedHypos; diff --git a/moses2/InputPathBase.cpp b/moses2/InputPathBase.cpp index 034122cc2..c77033548 100644 --- a/moses2/InputPathBase.cpp +++ b/moses2/InputPathBase.cpp @@ -11,8 +11,8 @@ namespace Moses2 { InputPathBase::InputPathBase(MemPool &pool, - const Range &range, size_t numPt, const InputPathBase *prefixPath) : - range(range), prefixPath(prefixPath) + const Range &range, size_t numPt, const InputPathBase *prefixPath) : + range(range), prefixPath(prefixPath) { } diff --git a/moses2/InputPathBase.h b/moses2/InputPathBase.h index d95d29e35..59fb219e3 100644 --- a/moses2/InputPathBase.h +++ b/moses2/InputPathBase.h @@ -24,7 +24,7 @@ public: Range range; InputPathBase(MemPool &pool, const Range &range, - size_t numPt, const InputPathBase *prefixPath); + size_t numPt, const InputPathBase *prefixPath); }; diff --git a/moses2/InputPathsBase.h b/moses2/InputPathsBase.h index 861bbf9f7..88e69ea04 100644 --- a/moses2/InputPathsBase.h +++ b/moses2/InputPathsBase.h @@ -22,8 +22,7 @@ class InputPathsBase { typedef std::vector Coll; public: - InputPathsBase() - { + InputPathsBase() { } virtual ~InputPathsBase(); @@ -31,21 +30,17 @@ public: typedef Coll::iterator iterator; typedef Coll::const_iterator const_iterator; - const_iterator begin() const - { + const_iterator begin() const { return m_inputPaths.begin(); } - const_iterator end() const - { + const_iterator end() const { return m_inputPaths.end(); } - iterator begin() - { + iterator begin() { return m_inputPaths.begin(); } - iterator end() - { + iterator end() { return m_inputPaths.end(); } diff --git a/moses2/InputType.cpp b/moses2/InputType.cpp index 01169c162..60664a85b 100644 --- a/moses2/InputType.cpp +++ b/moses2/InputType.cpp @@ -12,35 +12,35 @@ namespace Moses2 { ////////////////////////////////////////////////////////////////////////////// InputType::XMLOption::XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos) -:startPos(vStartPos) -,prob(0) -,m_entity(NULL) + :startPos(vStartPos) + ,prob(0) + ,m_entity(NULL) { - m_nodeName = pool.Allocate(nodeName.size() + 1); - strcpy(m_nodeName, nodeName.c_str()); + m_nodeName = pool.Allocate(nodeName.size() + 1); + strcpy(m_nodeName, nodeName.c_str()); } void InputType::XMLOption::SetTranslation(MemPool &pool, const std::string &val) { - m_translation = pool.Allocate(val.size() + 1); - strcpy(m_translation, val.c_str()); + m_translation = pool.Allocate(val.size() + 1); + strcpy(m_translation, val.c_str()); } void InputType::XMLOption::SetEntity(MemPool &pool, const std::string &val) { - m_entity = pool.Allocate(val.size() + 1); - strcpy(m_entity, val.c_str()); + m_entity = pool.Allocate(val.size() + 1); + strcpy(m_entity, val.c_str()); } std::string InputType::XMLOption::Debug(const System &system) const { std::stringstream out; out << "[" << startPos << "," << phraseSize << "]=" - << m_nodeName << "," - << m_translation << "," - << prob; + << m_nodeName << "," + << m_translation << "," + << prob; if (m_entity) { - out << "," << m_entity; + out << "," << m_entity; } return out.str(); } @@ -48,9 +48,9 @@ std::string InputType::XMLOption::Debug(const System &system) const ////////////////////////////////////////////////////////////////////////////// InputType::InputType(MemPool &pool) -:m_reorderingConstraint(pool) -,m_xmlOptions(pool) -,m_xmlCoverageMap(pool) + :m_reorderingConstraint(pool) + ,m_xmlOptions(pool) + ,m_xmlCoverageMap(pool) { } @@ -64,18 +64,18 @@ void InputType::Init(const System &system, size_t size, int max_distortion) m_reorderingConstraint.InitializeWalls(size, max_distortion); if (system.options.input.xml_policy != XmlPassThrough) { - m_xmlCoverageMap.assign(size, false); + m_xmlCoverageMap.assign(size, false); } } void InputType::AddXMLOption(const System &system, const XMLOption *xmlOption) { - m_xmlOptions.push_back(xmlOption); + m_xmlOptions.push_back(xmlOption); if (system.options.input.xml_policy != XmlPassThrough) { - for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) { - m_xmlCoverageMap[j]=true; - } + for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) { + m_xmlCoverageMap[j]=true; + } } } diff --git a/moses2/InputType.h b/moses2/InputType.h index 0a2aebfa1..8813bc484 100644 --- a/moses2/InputType.h +++ b/moses2/InputType.h @@ -20,29 +20,32 @@ public: class XMLOption { public: - size_t startPos, phraseSize; + size_t startPos, phraseSize; - SCORE prob; + SCORE prob; - XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos); + XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos); - const char *GetNodeName() const - { return m_nodeName; } + const char *GetNodeName() const { + return m_nodeName; + } - const char *GetTranslation() const - { return m_translation; } + const char *GetTranslation() const { + return m_translation; + } - const char *GetEntity() const - { return m_entity; } + const char *GetEntity() const { + return m_entity; + } - void SetTranslation(MemPool &pool, const std::string &val); - void SetEntity(MemPool &pool, const std::string &val); + void SetTranslation(MemPool &pool, const std::string &val); + void SetEntity(MemPool &pool, const std::string &val); - std::string Debug(const System &system) const; + std::string Debug(const System &system) const; public: - char *m_nodeName; - char *m_translation; - char *m_entity; + char *m_nodeName; + char *m_translation; + char *m_entity; }; @@ -53,14 +56,17 @@ public: virtual void Init(const System &system, size_t size, int max_distortion); - ReorderingConstraint &GetReorderingConstraint() - { return m_reorderingConstraint; } + ReorderingConstraint &GetReorderingConstraint() { + return m_reorderingConstraint; + } - const ReorderingConstraint &GetReorderingConstraint() const - { return m_reorderingConstraint; } + const ReorderingConstraint &GetReorderingConstraint() const { + return m_reorderingConstraint; + } - const Vector &GetXMLOptions() const - { return m_xmlOptions; } + const Vector &GetXMLOptions() const { + return m_xmlOptions; + } void AddXMLOption(const System &system, const XMLOption *xmlOption); diff --git a/moses2/LM/GPULM.cpp b/moses2/LM/GPULM.cpp index f2ff7b7e7..98ee22b11 100644 --- a/moses2/LM/GPULM.cpp +++ b/moses2/LM/GPULM.cpp @@ -29,28 +29,23 @@ using namespace std; namespace Moses2 { -struct GPULMState: public FFState -{ - virtual std::string ToString() const - { +struct GPULMState: public FFState { + virtual std::string ToString() const { return "GPULMState"; } - virtual size_t hash() const - { + virtual size_t hash() const { return boost::hash_value(lastWords); } - virtual bool operator==(const FFState& other) const - { + virtual bool operator==(const FFState& other) const { const GPULMState &otherCast = static_cast(other); bool ret = lastWords == otherCast.lastWords; return ret; } - void SetContext(const Context &context) - { + void SetContext(const Context &context) { lastWords = context; if (lastWords.size()) { lastWords.resize(lastWords.size() - 1); @@ -63,7 +58,7 @@ struct GPULMState: public FFState ///////////////////////////////////////////////////////////////// GPULM::GPULM(size_t startInd, const std::string &line) -:StatefulFeatureFunction(startInd, line) + :StatefulFeatureFunction(startInd, line) { cerr << "GPULM::GPULM" << endl; ReadParameters(); @@ -93,15 +88,15 @@ FFState* GPULM::BlankState(MemPool &pool, const System &sys) const //! return the state associated with the empty hypothesis for a given sentence void GPULM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const + const InputType &input, const Hypothesis &hypo) const { GPULMState &stateCast = static_cast(state); stateCast.lastWords.push_back(m_bos); } void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { if (targetPhrase.GetSize() == 0) { return; @@ -120,8 +115,7 @@ void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, if (context.size() == m_order) { //std::pair fromScoring = Score(context); //score += fromScoring.first; - } - else { + } else { //std::pair fromScoring = Score(context); //nonFullScore += fromScoring.first; } @@ -130,33 +124,30 @@ void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, } void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { UTIL_THROW2("Not implemented"); } void GPULM::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const { UTIL_THROW2("Not implemented"); } void GPULM::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { //cerr << "key=" << key << " " << value << endl; if (key == "path") { m_path = value; - } - else if (key == "order") { + } else if (key == "order") { m_order = Scan(value); - } - else if (key == "factor") { + } else if (key == "factor") { m_factorType = Scan(value); - } - else { + } else { StatefulFeatureFunction::SetParameter(key, value); } @@ -164,8 +155,8 @@ void GPULM::SetParameter(const std::string& key, } void GPULM::EvaluateWhenAppliedBatch( - const System &system, - const Batch &batch) const + const System &system, + const Batch &batch) const { // create list of ngrams std::vector > contexts; @@ -219,7 +210,7 @@ void GPULM::CreateNGram(std::vector > &contexts, } void GPULM::ShiftOrPush(std::vector &context, - const Factor *factor) const + const Factor *factor) const { if (context.size() < m_order) { context.resize(context.size() + 1); @@ -239,8 +230,8 @@ SCORE GPULM::Score(const Context &context) const } void GPULM::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/LM/GPULM.h b/moses2/LM/GPULM.h index ad236ef95..33f97a313 100644 --- a/moses2/LM/GPULM.h +++ b/moses2/LM/GPULM.h @@ -33,35 +33,35 @@ public: virtual void Load(System &system); void SetParameter(const std::string& key, - const std::string& value); + const std::string& value); virtual FFState* BlankState(MemPool &pool, const System &sys) const; //! return the state associated with the empty hypothesis for a given sentence virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; virtual void EvaluateWhenAppliedBatch( - const System &system, - const Batch &batch) const; + const System &system, + const Batch &batch) const; protected: std::string m_path; @@ -71,8 +71,7 @@ protected: const Factor *m_eos; size_t m_order; - inline lm::WordIndex TranslateID(const Word &word) const - { + inline lm::WordIndex TranslateID(const Word &word) const { std::size_t factor = word[m_factorType]->GetId(); return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); } @@ -83,7 +82,7 @@ protected: void CreateNGram(std::vector > &contexts, Hypothesis &hypo) const; void ShiftOrPush(std::vector &context, - const Factor *factor) const; + const Factor *factor) const; SCORE Score(const Context &context) const; }; diff --git a/moses2/LM/KENLM.cpp b/moses2/LM/KENLM.cpp index 3173392cd..689d76b92 100644 --- a/moses2/LM/KENLM.cpp +++ b/moses2/LM/KENLM.cpp @@ -28,23 +28,19 @@ using namespace std; namespace Moses2 { -struct KenLMState: public FFState -{ +struct KenLMState: public FFState { lm::ngram::State state; - virtual size_t hash() const - { + virtual size_t hash() const { size_t ret = hash_value(state); return ret; } - virtual bool operator==(const FFState& o) const - { + virtual bool operator==(const FFState& o) const { const KenLMState &other = static_cast(o); bool ret = state == other.state; return ret; } - virtual std::string ToString() const - { + virtual std::string ToString() const { stringstream ss; for (size_t i = 0; i < state.Length(); ++i) { ss << state.words[i] << " "; @@ -77,9 +73,8 @@ public: return ret; } - virtual std::string ToString() const - { - return "LanguageModelChartStateKenLM"; + virtual std::string ToString() const { + return "LanguageModelChartStateKenLM"; } private: @@ -91,13 +86,11 @@ class MappingBuilder: public lm::EnumerateVocab { public: MappingBuilder(FactorCollection &factorCollection, System &system, - std::vector &mapping) : - m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) - { + std::vector &mapping) : + m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) { } - void Add(lm::WordIndex index, const StringPiece &str) - { + void Add(lm::WordIndex index, const StringPiece &str) { std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId(); if (m_mapping.size() <= factorId) { // 0 is :-) @@ -115,10 +108,10 @@ private: ///////////////////////////////////////////////////////////////// template KENLM::KENLM(size_t startInd, const std::string &line, - const std::string &file, FactorType factorType, - util::LoadMethod load_method) : - StatefulFeatureFunction(startInd, line), m_path(file), m_factorType( - factorType), m_load_method(load_method) + const std::string &file, FactorType factorType, + util::LoadMethod load_method) : + StatefulFeatureFunction(startInd, line), m_path(file), m_factorType( + factorType), m_load_method(load_method) { ReadParameters(); } @@ -154,8 +147,7 @@ FFState* KENLM::BlankState(MemPool &pool, const System &sys) const FFState *ret; if (sys.isPb) { ret = new (pool.Allocate()) KenLMState(); - } - else { + } else { ret = new (pool.Allocate()) LanguageModelChartStateKenLM(); } return ret; @@ -164,7 +156,7 @@ FFState* KENLM::BlankState(MemPool &pool, const System &sys) const //! return the state associated with the empty hypothesis for a given sentence template void KENLM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const + const InputType &input, const Hypothesis &hypo) const { KenLMState &stateCast = static_cast(state); stateCast.state = m_ngram->BeginSentenceState(); @@ -172,8 +164,8 @@ void KENLM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, template void KENLM::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { // contains factors used by this LM float fullScore, nGramScore; @@ -193,22 +185,21 @@ void KENLM::EvaluateInIsolation(MemPool &pool, const System &system, estimateScoresVec[0] = estimateScore; estimateScoresVec[1] = 0; SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScoresVec); + estimateScoresVec); estimatedScore += weightedScore; - } - else { + } else { scores.PlusEquals(system, *this, nGramScore); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScore); + estimateScore); estimatedScore += weightedScore; } } template void KENLM::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { // contains factors used by this LM float fullScore, nGramScore; @@ -232,29 +223,28 @@ void KENLM::EvaluateInIsolation(MemPool &pool, const System &system, cons estimateScoresVec[0] = estimateScore; estimateScoresVec[1] = 0; SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScoresVec); + estimateScoresVec); estimatedScore += weightedScore; - } - else { + } else { scores.PlusEquals(system, *this, nGramScore); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScore); + estimateScore); estimatedScore += weightedScore; } } template void KENLM::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const { KenLMState &stateCast = static_cast(state); const System &system = mgr.system; const lm::ngram::State &in_state = - static_cast(prevState).state; + static_cast(prevState).state; if (!hypo.GetTargetPhrase().GetSize()) { stateCast.state = in_state; @@ -271,11 +261,11 @@ void KENLM::EvaluateWhenApplied(const ManagerBase &mgr, typename Model::State *state0 = &stateCast.state, *state1 = &aux_state; float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)), - *state0); + *state0); ++position; for (; position < adjust_end; ++position) { score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)), - *state1); + *state1); std::swap(state0, state1); } @@ -284,15 +274,13 @@ void KENLM::EvaluateWhenApplied(const ManagerBase &mgr, std::vector indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); score += m_ngram->FullScoreForgotState(&indices.front(), last, - m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob; - } - else if (adjust_end < end) { + m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob; + } else if (adjust_end < end) { // Get state after adding a long phrase. std::vector indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); m_ngram->GetState(&indices.front(), last, stateCast.state); - } - else if (state0 != &stateCast.state) { + } else if (state0 != &stateCast.state) { // Short enough phrase that we can just reuse the state. stateCast.state = *state0; } @@ -305,15 +293,14 @@ void KENLM::EvaluateWhenApplied(const ManagerBase &mgr, scoresVec[0] = score; scoresVec[1] = 0.0; scores.PlusEquals(system, *this, scoresVec); - } - else { + } else { scores.PlusEquals(system, *this, score); } } template void KENLM::CalcScore(const Phrase &phrase, float &fullScore, - float &ngramScore, std::size_t &oovCount) const + float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; @@ -328,8 +315,7 @@ void KENLM::CalcScore(const Phrase &phrase, float &fullScor if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; - } - else { + } else { position = 0; } @@ -357,7 +343,7 @@ void KENLM::CalcScore(const Phrase &phrase, float &fullScor template void KENLM::CalcScore(const Phrase &phrase, float &fullScore, - float &ngramScore, std::size_t &oovCount) const + float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; @@ -411,7 +397,7 @@ void KENLM::CalcScore(const Phrase &phrase, float &fullScore, // Convert last words of hypothesis into vocab ids, returning an end pointer. template lm::WordIndex *KENLM::LastIDs(const Hypothesis &hypo, - lm::WordIndex *indices) const + lm::WordIndex *indices) const { lm::WordIndex *index = indices; lm::WordIndex *end = indices + m_ngram->Order() - 1; @@ -428,8 +414,8 @@ lm::WordIndex *KENLM::LastIDs(const Hypothesis &hypo, template void KENLM::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const { LanguageModelChartStateKenLM &newState = static_cast(state); lm::ngram::RuleScore ruleScore(*m_ngram, newState.GetChartState()); @@ -511,48 +497,38 @@ FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig) for (; argument; ++argument) { const char *equals = std::find(argument->data(), - argument->data() + argument->size(), '='); + argument->data() + argument->size(), '='); UTIL_THROW_IF2(equals == argument->data() + argument->size(), - "Expected = in KenLM argument " << *argument); + "Expected = in KenLM argument " << *argument); StringPiece name(argument->data(), equals - argument->data()); StringPiece value(equals + 1, - argument->data() + argument->size() - equals - 1); + argument->data() + argument->size() - equals - 1); if (name == "factor") { factorType = boost::lexical_cast(value); - } - else if (name == "order") { + } else if (name == "order") { // Ignored - } - else if (name == "path") { + } else if (name == "path") { filePath.assign(value.data(), value.size()); - } - else if (name == "lazyken") { + } else if (name == "lazyken") { // deprecated: use load instead. load_method = - boost::lexical_cast(value) ? - util::LAZY : util::POPULATE_OR_READ; - } - else if (name == "load") { + boost::lexical_cast(value) ? + util::LAZY : util::POPULATE_OR_READ; + } else if (name == "load") { if (value == "lazy") { load_method = util::LAZY; - } - else if (value == "populate_or_lazy") { + } else if (value == "populate_or_lazy") { load_method = util::POPULATE_OR_LAZY; - } - else if (value == "populate_or_read" || value == "populate") { + } else if (value == "populate_or_read" || value == "populate") { load_method = util::POPULATE_OR_READ; - } - else if (value == "read") { + } else if (value == "read") { load_method = util::READ; - } - else if (value == "parallel_read") { + } else if (value == "parallel_read") { load_method = util::PARALLEL_READ; - } - else { + } else { UTIL_THROW2("Unknown KenLM load method " << value); } - } - else { + } else { // pass to base class to interpret line << " " << name << "=" << value; } @@ -562,38 +538,37 @@ FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig) } FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line, - const std::string &file, FactorType factorType, - util::LoadMethod load_method) + const std::string &file, FactorType factorType, + util::LoadMethod load_method) { lm::ngram::ModelType model_type; if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) { switch (model_type) { case lm::ngram::PROBING: return new KENLM(startInd, line, file, - factorType, load_method); + factorType, load_method); case lm::ngram::REST_PROBING: return new KENLM(startInd, line, file, - factorType, load_method); + factorType, load_method); case lm::ngram::TRIE: return new KENLM(startInd, line, file, factorType, - load_method); + load_method); case lm::ngram::QUANT_TRIE: return new KENLM(startInd, line, file, - factorType, load_method); + factorType, load_method); case lm::ngram::ARRAY_TRIE: return new KENLM(startInd, line, file, - factorType, load_method); + factorType, load_method); case lm::ngram::QUANT_ARRAY_TRIE: return new KENLM(startInd, line, file, - factorType, load_method); + factorType, load_method); default: UTIL_THROW2("Unrecognized kenlm model type " << model_type) ; } - } - else { + } else { return new KENLM(startInd, line, file, factorType, - load_method); + load_method); } } diff --git a/moses2/LM/KENLM.h b/moses2/LM/KENLM.h index 703b398d8..3c7839bea 100644 --- a/moses2/LM/KENLM.h +++ b/moses2/LM/KENLM.h @@ -19,15 +19,15 @@ class Word; FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig); FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line, - const std::string &file, FactorType factorType, - util::LoadMethod load_method); + const std::string &file, FactorType factorType, + util::LoadMethod load_method); template class KENLM: public StatefulFeatureFunction { public: KENLM(size_t startInd, const std::string &line, const std::string &file, - FactorType factorType, util::LoadMethod load_method); + FactorType factorType, util::LoadMethod load_method); virtual ~KENLM(); @@ -37,25 +37,25 @@ public: //! return the state associated with the empty hypothesis for a given sentence virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; protected: std::string m_path; @@ -67,13 +67,12 @@ protected: boost::shared_ptr m_ngram; void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, - std::size_t &oovCount) const; + std::size_t &oovCount) const; void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, - std::size_t &oovCount) const; + std::size_t &oovCount) const; - inline lm::WordIndex TranslateID(const Word &word) const - { + inline lm::WordIndex TranslateID(const Word &word) const { std::size_t factor = word[m_factorType]->GetId(); return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); } diff --git a/moses2/LM/KENLMBatch.cpp b/moses2/LM/KENLMBatch.cpp index 1ed6e7663..b35004bba 100644 --- a/moses2/LM/KENLMBatch.cpp +++ b/moses2/LM/KENLMBatch.cpp @@ -33,23 +33,19 @@ using namespace std; namespace Moses2 { -struct KenLMState: public FFState -{ +struct KenLMState: public FFState { lm::ngram::State state; - virtual size_t hash() const - { + virtual size_t hash() const { size_t ret = hash_value(state); return ret; } - virtual bool operator==(const FFState& o) const - { + virtual bool operator==(const FFState& o) const { const KenLMState &other = static_cast(o); bool ret = state == other.state; return ret; } - virtual std::string ToString() const - { + virtual std::string ToString() const { stringstream ss; for (size_t i = 0; i < state.Length(); ++i) { ss << state.words[i] << " "; @@ -64,13 +60,11 @@ class MappingBuilder: public lm::EnumerateVocab { public: MappingBuilder(FactorCollection &factorCollection, System &system, - std::vector &mapping) : - m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) - { + std::vector &mapping) : + m_factorCollection(factorCollection), m_system(system), m_mapping(mapping) { } - void Add(lm::WordIndex index, const StringPiece &str) - { + void Add(lm::WordIndex index, const StringPiece &str) { std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId(); if (m_mapping.size() <= factorId) { // 0 is :-) @@ -87,8 +81,8 @@ private: ///////////////////////////////////////////////////////////////// KENLMBatch::KENLMBatch(size_t startInd, const std::string &line) -:StatefulFeatureFunction(startInd, line) -,m_numHypos(0) + :StatefulFeatureFunction(startInd, line) + ,m_numHypos(0) { cerr << "KENLMBatch::KENLMBatch" << endl; ReadParameters(); @@ -126,15 +120,15 @@ FFState* KENLMBatch::BlankState(MemPool &pool, const System &sys) const //! return the state associated with the empty hypothesis for a given sentence void KENLMBatch::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const + const InputType &input, const Hypothesis &hypo) const { KenLMState &stateCast = static_cast(state); stateCast.state = m_ngram->BeginSentenceState(); } void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { // contains factors used by this LM float fullScore, nGramScore; @@ -154,34 +148,33 @@ void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, estimateScoresVec[0] = estimateScore; estimateScoresVec[1] = 0; SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScoresVec); + estimateScoresVec); estimatedScore += weightedScore; - } - else { + } else { scores.PlusEquals(system, *this, nGramScore); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, - estimateScore); + estimateScore); estimatedScore += weightedScore; } } void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const { KenLMState &stateCast = static_cast(state); const System &system = mgr.system; const lm::ngram::State &in_state = - static_cast(prevState).state; + static_cast(prevState).state; if (!hypo.GetTargetPhrase().GetSize()) { stateCast.state = in_state; @@ -198,11 +191,11 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr, typename Model::State *state0 = &stateCast.state, *state1 = &aux_state; float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)), - *state0); + *state0); ++position; for (; position < adjust_end; ++position) { score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)), - *state1); + *state1); std::swap(state0, state1); } @@ -211,15 +204,13 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr, std::vector indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); score += m_ngram->FullScoreForgotState(&indices.front(), last, - m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob; - } - else if (adjust_end < end) { + m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob; + } else if (adjust_end < end) { // Get state after adding a long phrase. std::vector indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); m_ngram->GetState(&indices.front(), last, stateCast.state); - } - else if (state0 != &stateCast.state) { + } else if (state0 != &stateCast.state) { // Short enough phrase that we can just reuse the state. stateCast.state = *state0; } @@ -232,14 +223,13 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr, scoresVec[0] = score; scoresVec[1] = 0.0; scores.PlusEquals(system, *this, scoresVec); - } - else { + } else { scores.PlusEquals(system, *this, score); } } void KENLMBatch::CalcScore(const Phrase &phrase, float &fullScore, - float &ngramScore, std::size_t &oovCount) const + float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; @@ -254,8 +244,7 @@ void KENLMBatch::CalcScore(const Phrase &phrase, float &fullScore, if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; - } - else { + } else { position = 0; } @@ -283,7 +272,7 @@ void KENLMBatch::CalcScore(const Phrase &phrase, float &fullScore, // Convert last words of hypothesis into vocab ids, returning an end pointer. lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo, - lm::WordIndex *indices) const + lm::WordIndex *indices) const { lm::WordIndex *index = indices; lm::WordIndex *end = indices + m_ngram->Order() - 1; @@ -299,44 +288,34 @@ lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo, } void KENLMBatch::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { //cerr << "key=" << key << " " << value << endl; if (key == "path") { m_path = value; - } - else if (key == "order") { + } else if (key == "order") { // ignore - } - else if (key == "factor") { + } else if (key == "factor") { m_factorType = Scan(value); - } - else if (key == "lazyken") { + } else if (key == "lazyken") { m_load_method = - boost::lexical_cast(value) ? - util::LAZY : util::POPULATE_OR_READ; - } - else if (key == "load") { + boost::lexical_cast(value) ? + util::LAZY : util::POPULATE_OR_READ; + } else if (key == "load") { if (value == "lazy") { m_load_method = util::LAZY; - } - else if (value == "populate_or_lazy") { + } else if (value == "populate_or_lazy") { m_load_method = util::POPULATE_OR_LAZY; - } - else if (value == "populate_or_read" || value == "populate") { + } else if (value == "populate_or_read" || value == "populate") { m_load_method = util::POPULATE_OR_READ; - } - else if (value == "read") { + } else if (value == "read") { m_load_method = util::READ; - } - else if (value == "parallel_read") { + } else if (value == "parallel_read") { m_load_method = util::PARALLEL_READ; - } - else { + } else { UTIL_THROW2("Unknown KenLM load method " << value); } - } - else { + } else { StatefulFeatureFunction::SetParameter(key, value); } @@ -344,7 +323,7 @@ void KENLMBatch::SetParameter(const std::string& key, } void KENLMBatch::EvaluateWhenAppliedBatch( - const Batch &batch) const + const Batch &batch) const { { // write lock @@ -362,8 +341,7 @@ void KENLMBatch::EvaluateWhenAppliedBatch( m_numHypos = 0; m_threadNeeded.notify_all(); - } - else { + } else { boost::mutex::scoped_lock lock(m_mutex); m_threadNeeded.wait(lock); } @@ -380,8 +358,8 @@ void KENLMBatch::EvaluateWhenAppliedBatch() const } void KENLMBatch::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/LM/KENLMBatch.h b/moses2/LM/KENLMBatch.h index 21dc8637c..3d27bc36b 100644 --- a/moses2/LM/KENLMBatch.h +++ b/moses2/LM/KENLMBatch.h @@ -33,34 +33,34 @@ public: virtual void Load(System &system); void SetParameter(const std::string& key, - const std::string& value); + const std::string& value); virtual FFState* BlankState(MemPool &pool, const System &sys) const; //! return the state associated with the empty hypothesis for a given sentence virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; virtual void EvaluateWhenAppliedBatch( - const Batch &batch) const; + const Batch &batch) const; protected: std::string m_path; @@ -73,10 +73,9 @@ protected: boost::shared_ptr m_ngram; void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, - std::size_t &oovCount) const; + std::size_t &oovCount) const; - inline lm::WordIndex TranslateID(const Word &word) const - { + inline lm::WordIndex TranslateID(const Word &word) const { std::size_t factor = word[m_factorType]->GetId(); return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); } diff --git a/moses2/LM/LanguageModel.cpp b/moses2/LM/LanguageModel.cpp index 3e0c39d20..a720851ba 100644 --- a/moses2/LM/LanguageModel.cpp +++ b/moses2/LM/LanguageModel.cpp @@ -22,28 +22,24 @@ using namespace std; namespace Moses2 { -struct LMState: public PointerState -{ +struct LMState: public PointerState { LMState() : - PointerState() - { + PointerState() { // uninitialised } - void Set(MemPool &pool, void *lms, const std::vector &context) - { + void Set(MemPool &pool, void *lms, const std::vector &context) { lmstate = lms; numWords = context.size(); lastWords = (const Factor**) pool.Allocate( - sizeof(const Factor*) * numWords); + sizeof(const Factor*) * numWords); for (size_t i = 0; i < numWords; ++i) { lastWords[i] = context[i]; } } - void Init(MemPool &pool, const Factor *factor) - { + void Init(MemPool &pool, const Factor *factor) { lmstate = NULL; numWords = 1; lastWords = (const Factor**) pool.Allocate(sizeof(const Factor*)); @@ -56,7 +52,7 @@ struct LMState: public PointerState //////////////////////////////////////////////////////////////////////////////////////// LanguageModel::LanguageModel(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line), m_oov(-100) + StatefulFeatureFunction(startInd, line), m_oov(-100) { ReadParameters(); } @@ -112,18 +108,15 @@ void LanguageModel::Load(System &system) } void LanguageModel::SetParameter(const std::string& key, - const std::string& value) + const std::string& value) { if (key == "path") { m_path = value; - } - else if (key == "factor") { + } else if (key == "factor") { m_factorType = Scan(value); - } - else if (key == "order") { + } else if (key == "order") { m_order = Scan(value); - } - else { + } else { StatefulFeatureFunction::SetParameter(key, value); } } @@ -143,8 +136,8 @@ void LanguageModel::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { if (targetPhrase.GetSize() == 0) { return; @@ -163,8 +156,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, if (context.size() == m_order) { std::pair fromScoring = Score(context); score += fromScoring.first; - } - else { + } else { std::pair fromScoring = Score(context); nonFullScore += fromScoring.first; } @@ -176,14 +168,14 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const { const LMState &prevLMState = static_cast(prevState); size_t numWords = prevLMState.numWords; @@ -214,8 +206,7 @@ void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr, score += fromScoring.first; fromScoring.second = NULL; context.clear(); - } - else { + } else { assert(context.size()); if (context.size() == m_order) { context.resize(context.size() - 1); @@ -233,7 +224,7 @@ void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr, } void LanguageModel::ShiftOrPush(std::vector &context, - const Factor *factor) const + const Factor *factor) const { if (context.size() < m_order) { context.resize(context.size() + 1); @@ -248,7 +239,7 @@ void LanguageModel::ShiftOrPush(std::vector &context, } std::pair LanguageModel::Score( - const std::vector &context) const + const std::vector &context) const { //cerr << "context="; //DebugContext(context); @@ -260,8 +251,7 @@ std::pair LanguageModel::Score( if (node) { ret.first = node->getValue().prob; ret.second = (void*) node; - } - else { + } else { SCORE backoff = 0; std::vector backOffContext(context.begin() + 1, context.end()); @@ -282,7 +272,7 @@ std::pair LanguageModel::Score( } SCORE LanguageModel::BackoffScore( - const std::vector &context) const + const std::vector &context) const { //cerr << "backoff="; //DebugContext(context); @@ -295,19 +285,17 @@ SCORE LanguageModel::BackoffScore( if (stoppedAtInd == context.size()) { // found entire ngram ret = node.getValue().backoff; - } - else { + } else { if (stoppedAtInd == 0) { ret = m_oov; stoppedAtInd = 1; - } - else { + } else { ret = node.getValue().backoff; } // recursive std::vector backoff(context.begin() + stoppedAtInd, - context.end()); + context.end()); ret += BackoffScore(backoff); } @@ -315,7 +303,7 @@ SCORE LanguageModel::BackoffScore( } void LanguageModel::DebugContext( - const std::vector &context) const + const std::vector &context) const { for (size_t i = 0; i < context.size(); ++i) { cerr << context[i]->GetString() << " "; @@ -324,8 +312,8 @@ void LanguageModel::DebugContext( } void LanguageModel::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/LM/LanguageModel.h b/moses2/LM/LanguageModel.h index d262a8497..00e4e5051 100644 --- a/moses2/LM/LanguageModel.h +++ b/moses2/LM/LanguageModel.h @@ -17,24 +17,19 @@ namespace Moses2 { //////////////////////////////////////////////////////////////////////////////////////// -struct LMScores -{ - LMScores() - { +struct LMScores { + LMScores() { } LMScores(const LMScores ©) : - prob(copy.prob), backoff(copy.backoff) - { + prob(copy.prob), backoff(copy.backoff) { } LMScores(float inProb, float inBackoff) : - prob(inProb), backoff(inBackoff) - { + prob(inProb), backoff(inBackoff) { } - void Debug(std::ostream &out, const System &system) const - { + void Debug(std::ostream &out, const System &system) const { out << "(" << prob << "," << backoff << ")" << std::flush; } @@ -54,25 +49,25 @@ public: virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; protected: std::string m_path; @@ -85,9 +80,9 @@ protected: const Factor *m_eos; void ShiftOrPush(std::vector &context, - const Factor *factor) const; + const Factor *factor) const; std::pair Score( - const std::vector &context) const; + const std::vector &context) const; SCORE BackoffScore(const std::vector &context) const; void DebugContext(const std::vector &context) const; diff --git a/moses2/LM/LanguageModelDALM.cpp b/moses2/LM/LanguageModelDALM.cpp index 7d3e8242b..ed2340995 100644 --- a/moses2/LM/LanguageModelDALM.cpp +++ b/moses2/LM/LanguageModelDALM.cpp @@ -76,8 +76,9 @@ public: state.refresh(); } - virtual std::string ToString() const - { return "DALM state"; } + virtual std::string ToString() const { + return "DALM state"; + } }; @@ -89,29 +90,30 @@ inline void read_ini(const char *inifile, string &model, string &words, string & getline(ifs, line); while(ifs) { - unsigned int pos = line.find("="); - string key = line.substr(0, pos); - string value = line.substr(pos+1, line.size()-pos); - if(key=="MODEL") { - model = value; - } else if(key=="WORDS") { - words = value; - } else if(key=="WORDSTXT") { - wordstxt = value; - } - getline(ifs, line); + unsigned int pos = line.find("="); + string key = line.substr(0, pos); + string value = line.substr(pos+1, line.size()-pos); + if(key=="MODEL") { + model = value; + } else if(key=="WORDS") { + words = value; + } else if(key=="WORDSTXT") { + wordstxt = value; + } + getline(ifs, line); } } ///////////////////////// LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line) -:StatefulFeatureFunction(startInd, line) + :StatefulFeatureFunction(startInd, line) { - ReadParameters(); + ReadParameters(); } -LanguageModelDALM::~LanguageModelDALM() { - // TODO Auto-generated destructor stub +LanguageModelDALM::~LanguageModelDALM() +{ + // TODO Auto-generated destructor stub } void LanguageModelDALM::Load(System &system) @@ -165,72 +167,72 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const Sy string line; std::size_t max_fid = 0; while(getline(vocabStrm, line)) { - const Factor *factor = system.GetVocab().AddFactor(line, system); - std::size_t fid = factor->GetId(); - DALM::VocabId wid = m_vocab->lookup(line.c_str()); + const Factor *factor = system.GetVocab().AddFactor(line, system); + std::size_t fid = factor->GetId(); + DALM::VocabId wid = m_vocab->lookup(line.c_str()); - vlist.push_back(std::pair(fid, wid)); - if(max_fid < fid) max_fid = fid; + vlist.push_back(std::pair(fid, wid)); + if(max_fid < fid) max_fid = fid; } for(std::size_t i = 0; i < m_vocabMap.size(); i++) { - m_vocabMap[i] = m_vocab->unk(); + m_vocabMap[i] = m_vocab->unk(); } m_vocabMap.resize(max_fid+1, m_vocab->unk()); std::vector< std::pair >::iterator it = vlist.begin(); while(it != vlist.end()) { - std::pair &entry = *it; - m_vocabMap[entry.first] = entry.second; + std::pair &entry = *it; + m_vocabMap[entry.first] = entry.second; - ++it; + ++it; } } void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value) { if (key == "factor") { - m_factorType = Scan(value); + m_factorType = Scan(value); } else if (key == "order") { - m_nGramOrder = Scan(value); + m_nGramOrder = Scan(value); } else if (key == "path") { - m_filePath = value; + m_filePath = value; } else { - StatefulFeatureFunction::SetParameter(key, value); + StatefulFeatureFunction::SetParameter(key, value); } m_ContextSize = m_nGramOrder-1; } FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const { - DALMState *state = new DALMState(); - return state; + DALMState *state = new DALMState(); + return state; } void LanguageModelDALM::EmptyHypothesisState(FFState &state, - const ManagerBase &mgr, - const InputType &input, - const Hypothesis &hypo) const + const ManagerBase &mgr, + const InputType &input, + const Hypothesis &hypo) const { DALMState &dalmState = static_cast(state); m_lm->init_state(dalmState.get_state()); } - void LanguageModelDALM::EvaluateInIsolation(MemPool &pool, - const System &system, - const Phrase &source, - const TargetPhraseImpl &targetPhrase, - Scores &scores, - SCORE &estimatedScore) const - { +void LanguageModelDALM::EvaluateInIsolation(MemPool &pool, + const System &system, + const Phrase &source, + const TargetPhraseImpl &targetPhrase, + Scores &scores, + SCORE &estimatedScore) const +{ - } +} void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr, -const Hypothesis &hypo, -const FFState &prevState, -Scores &scores, -FFState &state) const + const Hypothesis &hypo, + const FFState &prevState, + Scores &scores, + FFState &state) const { } diff --git a/moses2/LM/LanguageModelDALM.h b/moses2/LM/LanguageModelDALM.h index cbbeca97d..a9a010dca 100644 --- a/moses2/LM/LanguageModelDALM.h +++ b/moses2/LM/LanguageModelDALM.h @@ -36,20 +36,20 @@ public: virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; + const InputType &input, const Hypothesis &hypo) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; + const Hypothesis &hypo, const FFState &prevState, Scores &scores, + FFState &state) const; virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; + const SCFG::Hypothesis &hypo, int featureID, Scores &scores, + FFState &state) const; protected: FactorType m_factorType; diff --git a/moses2/Main.cpp b/moses2/Main.cpp index 0661d1d0e..cf833760a 100644 --- a/moses2/Main.cpp +++ b/moses2/Main.cpp @@ -20,15 +20,15 @@ using namespace std; int main(int argc, char** argv) { - cerr << "Starting..." << endl; + cerr << "Starting..." << endl; Moses2::Timer timer; timer.start(); - //Temp(); + //Temp(); Moses2::Parameter params; if (!params.LoadParam(argc, argv)) { - return EXIT_FAILURE; + return EXIT_FAILURE; } Moses2::System system(params); timer.check("Loaded"); @@ -45,8 +45,7 @@ int main(int argc, char** argv) if (params.GetParam("server")) { std::cerr << "RUN SERVER" << std::endl; run_as_server(system); - } - else { + } else { std::cerr << "RUN BATCH" << std::endl; batch_run(params, system, pool); } @@ -71,8 +70,7 @@ istream &GetInputStream(Moses2::Parameter ¶ms) if (vec && vec->size()) { Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0)); return *stream; - } - else { + } else { return cin; } } @@ -86,7 +84,7 @@ void batch_run(Moses2::Parameter ¶ms, Moses2::System &system, Moses2::Thread string line; while (getline(inStream, line)) { //cerr << "line=" << line << endl; - boost::shared_ptr task(new Moses2::TranslationTask(system, line, translationId)); + boost::shared_ptr task(new Moses2::TranslationTask(system, line, translationId)); //cerr << "START pool.Submit()" << endl; pool.Submit(task); @@ -106,23 +104,23 @@ void batch_run(Moses2::Parameter ¶ms, Moses2::System &system, Moses2::Thread //////////////////////////////////////////////////////////////////////////////////////////////// void Temp() { - Moses2::MemPool pool; - Moses2::MemPoolAllocator a(pool); + Moses2::MemPool pool; + Moses2::MemPoolAllocator a(pool); - boost::unordered_set, std::equal_to, Moses2::MemPoolAllocator > s(a); - s.insert(3); - s.insert(4); - s.insert(3); - s.erase(3); + boost::unordered_set, std::equal_to, Moses2::MemPoolAllocator > s(a); + s.insert(3); + s.insert(4); + s.insert(3); + s.erase(3); - boost::pool_allocator alloc; - std::vector > v(alloc); - for (int i = 0; i < 1000; ++i) - v.push_back(i); + boost::pool_allocator alloc; + std::vector > v(alloc); + for (int i = 0; i < 1000; ++i) + v.push_back(i); - v.clear(); - boost::singleton_pool:: - purge_memory(); + v.clear(); + boost::singleton_pool:: + purge_memory(); - abort(); + abort(); } diff --git a/moses2/Main.h b/moses2/Main.h index 41e016130..731d6385b 100644 --- a/moses2/Main.h +++ b/moses2/Main.h @@ -7,7 +7,8 @@ #pragma once #include -namespace Moses2 { +namespace Moses2 +{ class Parameter; class System; class ThreadPool; diff --git a/moses2/ManagerBase.cpp b/moses2/ManagerBase.cpp index 1e774cc5b..f40aa7b2f 100644 --- a/moses2/ManagerBase.cpp +++ b/moses2/ManagerBase.cpp @@ -21,14 +21,14 @@ using namespace std; namespace Moses2 { ManagerBase::ManagerBase(System &sys, const TranslationTask &task, - const std::string &inputStr, long translationId) -:system(sys) -,task(task) -,m_inputStr(inputStr) -,m_translationId(translationId) -,m_pool(NULL) -,m_systemPool(NULL) -,m_hypoRecycle(NULL) + const std::string &inputStr, long translationId) + :system(sys) + ,task(task) + ,m_inputStr(inputStr) + ,m_translationId(translationId) + ,m_pool(NULL) + ,m_systemPool(NULL) + ,m_hypoRecycle(NULL) { } @@ -37,10 +37,10 @@ ManagerBase::~ManagerBase() system.featureFunctions.CleanUpAfterSentenceProcessing(); if (m_pool) { - GetPool().Reset(); + GetPool().Reset(); } if (m_hypoRecycle) { - GetHypoRecycle().Clear(); + GetHypoRecycle().Clear(); } } diff --git a/moses2/ManagerBase.h b/moses2/ManagerBase.h index 7b4a02ba8..cb8ee019c 100644 --- a/moses2/ManagerBase.h +++ b/moses2/ManagerBase.h @@ -38,27 +38,32 @@ public: mutable ArcLists arcLists; ManagerBase(System &sys, const TranslationTask &task, - const std::string &inputStr, long translationId); + const std::string &inputStr, long translationId); virtual ~ManagerBase(); virtual void Decode() = 0; virtual std::string OutputBest() const = 0; virtual std::string OutputNBest() = 0; virtual std::string OutputTransOpt() = 0; - MemPool &GetPool() const - { return *m_pool; } + MemPool &GetPool() const { + return *m_pool; + } - MemPool &GetSystemPool() const - { return *m_systemPool; } + MemPool &GetSystemPool() const { + return *m_systemPool; + } - Recycler &GetHypoRecycle() const - { return *m_hypoRecycle; } + Recycler &GetHypoRecycle() const { + return *m_hypoRecycle; + } - const InputType &GetInput() const - { return *m_input; } + const InputType &GetInput() const { + return *m_input; + } - long GetTranslationId() const - { return m_translationId; } + long GetTranslationId() const { + return m_translationId; + } protected: std::string m_inputStr; diff --git a/moses2/MemPool.cpp b/moses2/MemPool.cpp index 7e159117b..31d684bfc 100644 --- a/moses2/MemPool.cpp +++ b/moses2/MemPool.cpp @@ -16,7 +16,7 @@ namespace Moses2 { MemPool::Page::Page(std::size_t vSize) : - size(vSize) + size(vSize) { mem = (uint8_t*) util::MallocOrThrow(size); end = mem + size; @@ -28,7 +28,7 @@ MemPool::Page::~Page() } //////////////////////////////////////////////////// MemPool::MemPool(size_t initSize) : - m_currSize(initSize), m_currPage(0) + m_currSize(initSize), m_currPage(0) { Page *page = new Page(m_currSize); m_pages.push_back(page); @@ -57,16 +57,14 @@ uint8_t *MemPool::More(std::size_t size) uint8_t *ret = page->mem; current_ = ret + size; return ret; - } - else { + } else { // use existing page Page &page = *m_pages[m_currPage]; if (size <= page.size) { uint8_t *ret = page.mem; current_ = ret + size; return ret; - } - else { + } else { // recursive call More() return More(size); } diff --git a/moses2/MemPool.h b/moses2/MemPool.h index eaa55915e..2e8fccc34 100644 --- a/moses2/MemPool.h +++ b/moses2/MemPool.h @@ -20,14 +20,12 @@ namespace Moses2 class MemPool { - struct Page - { + struct Page { uint8_t *mem; uint8_t *end; size_t size; - Page() - { + Page() { } Page(std::size_t size); ~Page(); @@ -38,8 +36,7 @@ public: ~MemPool(); - uint8_t *Allocate(std::size_t size) - { + uint8_t *Allocate(std::size_t size) { size = (size + 3) & 0xfffffffc; uint8_t *ret = current_; @@ -48,8 +45,7 @@ public: Page &page = *m_pages[m_currPage]; if (current_ <= page.end) { // return what we got - } - else { + } else { ret = More(size); } return ret; @@ -57,15 +53,13 @@ public: } template - T *Allocate() - { + T *Allocate() { uint8_t *ret = Allocate(sizeof(T)); return (T*) ret; } template - T *Allocate(size_t num) - { + T *Allocate(size_t num) { uint8_t *ret = Allocate(sizeof(T) * num); return (T*) ret; } @@ -94,18 +88,15 @@ class ObjectPoolContiguous public: ObjectPoolContiguous(std::size_t initSize = 100000) : - m_size(0), m_actualSize(initSize) - { + m_size(0), m_actualSize(initSize) { m_vec = (T*) malloc(sizeof(T) * initSize); } - ~ObjectPoolContiguous() - { + ~ObjectPoolContiguous() { free(m_vec); } - void Add(T &obj) - { + void Add(T &obj) { if (m_size >= m_actualSize) { //std::cerr << std::endl << "MORE " << m_size << std::endl; m_actualSize *= 2; @@ -116,46 +107,38 @@ public: ++m_size; } - bool IsEmpty() const - { + bool IsEmpty() const { return m_size == 0; } - void Reset() - { + void Reset() { m_size = 0; } // vector op - size_t GetSize() const - { + size_t GetSize() const { return m_size; } - const T& operator[](size_t ind) const - { + const T& operator[](size_t ind) const { return m_vec[ind]; } // stack op - const T &Get() const - { + const T &Get() const { return m_vec[m_size - 1]; } - void Pop() - { + void Pop() { --m_size; } - T *GetData() - { + T *GetData() { return m_vec; } template - void Sort(const ORDERER &orderer) - { + void Sort(const ORDERER &orderer) { std::sort(m_vec, m_vec + m_size, orderer); } diff --git a/moses2/MemPoolAllocator.h b/moses2/MemPoolAllocator.h index 6cc699893..cb0a546c7 100644 --- a/moses2/MemPoolAllocator.h +++ b/moses2/MemPoolAllocator.h @@ -17,51 +17,42 @@ public: typedef std::ptrdiff_t difference_type; template - struct rebind - { + struct rebind { typedef MemPoolAllocator other; }; MemPoolAllocator(Moses2::MemPool &pool) : - m_pool(pool) - { + m_pool(pool) { } MemPoolAllocator(const MemPoolAllocator &other) : - m_pool(other.m_pool) - { + m_pool(other.m_pool) { } template MemPoolAllocator(const MemPoolAllocator& other) : - m_pool(other.m_pool) - { + m_pool(other.m_pool) { } - size_type max_size() const - { + size_type max_size() const { return std::numeric_limits::max(); } - void deallocate(pointer p, size_type n) - { + void deallocate(pointer p, size_type n) { //std::cerr << "deallocate " << p << " " << n << std::endl; } - pointer allocate(size_type n, std::allocator::const_pointer hint = 0) - { + pointer allocate(size_type n, std::allocator::const_pointer hint = 0) { //std::cerr << "allocate " << n << " " << hint << std::endl; pointer ret = m_pool.Allocate(n); return ret; } - void construct(pointer p, const_reference val) - { + void construct(pointer p, const_reference val) { //std::cerr << "construct " << p << " " << n << std::endl; new ((void *) p) T(val); } - void destroy(pointer p) - { + void destroy(pointer p) { //std::cerr << "destroy " << p << " " << n << std::endl; } diff --git a/moses2/MorphoTrie/MorphTrie.h b/moses2/MorphoTrie/MorphTrie.h index 0b013b5bb..5ffc2b29b 100644 --- a/moses2/MorphoTrie/MorphTrie.h +++ b/moses2/MorphoTrie/MorphTrie.h @@ -11,24 +11,23 @@ template class MorphTrie { public: - MorphTrie() - { + MorphTrie() { } Node* insert(const std::vector& word, - const ValueClass& value); + const ValueClass& value); const Node* getNode( - const std::vector& words) const; + const std::vector& words) const; const Node &getNode(const std::vector& words, size_t &stoppedAtInd) const; std::vector*> getNodes( - const std::vector& words, size_t &stoppedAtInd) const; + const std::vector& words, size_t &stoppedAtInd) const; private: Node root; }; template Node* MorphTrie::insert( - const std::vector& word, const ValueClass& value) + const std::vector& word, const ValueClass& value) { Node* cNode = &root; for (size_t i = 0; i < word.size(); ++i) { @@ -41,7 +40,7 @@ Node* MorphTrie::insert( template const Node* MorphTrie::getNode( - const std::vector& words) const + const std::vector& words) const { size_t stoppedAtInd; const Node &ret = getNode(words, stoppedAtInd); @@ -53,7 +52,7 @@ const Node* MorphTrie::getNode( template const Node &MorphTrie::getNode( - const std::vector& words, size_t &stoppedAtInd) const + const std::vector& words, size_t &stoppedAtInd) const { const Node *prevNode = &root, *newNode; for (size_t i = 0; i < words.size(); ++i) { @@ -72,7 +71,7 @@ const Node &MorphTrie::getNode( template std::vector*> MorphTrie::getNodes( - const std::vector& words, size_t &stoppedAtInd) const + const std::vector& words, size_t &stoppedAtInd) const { std::vector*> ret; const Node *prevNode = &root, *newNode; @@ -84,8 +83,7 @@ std::vector*> MorphTrie:: if (newNode == NULL) { stoppedAtInd = i; return ret; - } - else { + } else { ret.push_back(newNode); } prevNode = newNode; diff --git a/moses2/MorphoTrie/Node.h b/moses2/MorphoTrie/Node.h index ca165ef67..cd91d8922 100644 --- a/moses2/MorphoTrie/Node.h +++ b/moses2/MorphoTrie/Node.h @@ -12,28 +12,23 @@ template class Node { public: - Node() - { + Node() { } Node(const ValueClass& value) : - m_value(value) - { + m_value(value) { } ~Node(); void setKey(const KeyClass& key); - void setValue(const ValueClass& value) - { + void setValue(const ValueClass& value) { m_value = value; } Node* findSub(const KeyClass& key); const Node* findSub(const KeyClass& key) const; - Node *addSubnode(const KeyClass& cKey) - { + Node *addSubnode(const KeyClass& cKey) { Node *node = findSub(cKey); if (node) { return node; - } - else { + } else { node = new Node(); subNodes[cKey] = node; return node; @@ -41,8 +36,7 @@ public: } std::vector getSubnodes(); - const ValueClass &getValue() const - { + const ValueClass &getValue() const { return m_value; } @@ -64,7 +58,7 @@ Node::~Node() template const Node* Node::findSub( - const KeyClass& cKey) const + const KeyClass& cKey) const { typename boost::unordered_map::const_iterator iter; iter = subNodes.find(cKey); @@ -77,7 +71,7 @@ const Node* Node::findSub( template Node* Node::findSub( - const KeyClass& cKey) + const KeyClass& cKey) { typename boost::unordered_map::iterator iter; iter = subNodes.find(cKey); diff --git a/moses2/Phrase.h b/moses2/Phrase.h index 714e65d42..100701483 100644 --- a/moses2/Phrase.h +++ b/moses2/Phrase.h @@ -32,17 +32,16 @@ template class Phrase { public: - virtual ~Phrase() - { + virtual ~Phrase() { } virtual const WORD& operator[](size_t pos) const = 0; virtual size_t GetSize() const = 0; - virtual const WORD& Back() const - { return (*this)[GetSize() - 1]; } + virtual const WORD& Back() const { + return (*this)[GetSize() - 1]; + } - virtual size_t hash() const - { + virtual size_t hash() const { size_t seed = 0; for (size_t i = 0; i < GetSize(); ++i) { @@ -54,8 +53,7 @@ public: return seed; } - virtual bool operator==(const Phrase &compare) const - { + virtual bool operator==(const Phrase &compare) const { if (GetSize() != compare.GetSize()) { return false; } @@ -71,13 +69,11 @@ public: return true; } - virtual bool operator!=(const Phrase &compare) const - { + virtual bool operator!=(const Phrase &compare) const { return !((*this) == compare); } - virtual std::string GetString(const FactorList &factorTypes) const - { + virtual std::string GetString(const FactorList &factorTypes) const { if (GetSize() == 0) { return ""; } @@ -95,8 +91,7 @@ public: virtual SubPhrase GetSubPhrase(size_t start, size_t size) const = 0; - virtual std::string Debug(const System &system) const - { + virtual std::string Debug(const System &system) const { std::stringstream out; size_t size = GetSize(); if (size) { @@ -110,8 +105,7 @@ public: return out.str(); } - virtual void OutputToStream(const System &system, std::ostream &out) const - { + virtual void OutputToStream(const System &system, std::ostream &out) const { size_t size = GetSize(); if (size) { (*this)[0].OutputToStream(system, out); @@ -131,8 +125,7 @@ template class PhraseOrdererLexical { public: - bool operator()(const Phrase &a, const Phrase &b) const - { + bool operator()(const Phrase &a, const Phrase &b) const { size_t minSize = std::min(a.GetSize(), b.GetSize()); for (size_t i = 0; i < minSize; ++i) { const Word &aWord = a[i]; diff --git a/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp b/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp index 2af2b35f0..7fcd4fa0c 100644 --- a/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp +++ b/moses2/PhraseBased/CubePruningMiniStack/Misc.cpp @@ -22,22 +22,20 @@ namespace NSCubePruningMiniStack //////////////////////////////////////////////////////////////////////// QueueItem *QueueItem::Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge, - size_t hypoIndex, size_t tpIndex, - QueueItemRecycler &queueItemRecycler) + size_t hypoIndex, size_t tpIndex, + QueueItemRecycler &queueItemRecycler) { QueueItem *ret; if (currItem) { // reuse incoming queue item to create new item ret = currItem; ret->Init(mgr, edge, hypoIndex, tpIndex); - } - else if (!queueItemRecycler.empty()) { + } else if (!queueItemRecycler.empty()) { // use item from recycle bin ret = queueItemRecycler.back(); ret->Init(mgr, edge, hypoIndex, tpIndex); queueItemRecycler.pop_back(); - } - else { + } else { // create new item ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); @@ -47,14 +45,14 @@ QueueItem *QueueItem::Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge, } QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, - size_t tpIndex) : - edge(&edge), hypoIndex(hypoIndex), tpIndex(tpIndex) + size_t tpIndex) : + edge(&edge), hypoIndex(hypoIndex), tpIndex(tpIndex) { CreateHypothesis(mgr); } void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, - size_t tpIndex) + size_t tpIndex) { this->edge = &edge; this->hypoIndex = hypoIndex; @@ -66,7 +64,7 @@ void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, void QueueItem::CreateHypothesis(Manager &mgr) { const Hypothesis *prevHypo = - static_cast(edge->hypos[hypoIndex]); + static_cast(edge->hypos[hypoIndex]); const TargetPhraseImpl &tp = edge->tps[tpIndex]; //cerr << "hypoIndex=" << hypoIndex << endl; @@ -76,7 +74,7 @@ void QueueItem::CreateHypothesis(Manager &mgr) hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, - edge->estimatedScore); + edge->estimatedScore); if (!mgr.system.options.cube.lazy_scoring) { hypo->EvaluateWhenApplied(); @@ -85,8 +83,8 @@ void QueueItem::CreateHypothesis(Manager &mgr) //////////////////////////////////////////////////////////////////////// CubeEdge::CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path, - const TargetPhrases &tps, const Bitmap &newBitmap) : - hypos(hypos), path(path), tps(tps), newBitmap(newBitmap) + const TargetPhrases &tps, const Bitmap &newBitmap) : + hypos(hypos), path(path), tps(tps), newBitmap(newBitmap) { estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); } @@ -99,7 +97,7 @@ std::string CubeEdge::Debug(const System &system) const } bool CubeEdge::SetSeenPosition(const size_t x, const size_t y, - SeenPositions &seenPositions) const + SeenPositions &seenPositions) const { //UTIL_THROW_IF2(x >= (1<<17), "Error"); //UTIL_THROW_IF2(y >= (1<<17), "Error"); @@ -110,22 +108,22 @@ bool CubeEdge::SetSeenPosition(const size_t x, const size_t y, } void CubeEdge::CreateFirst(Manager &mgr, Queue &queue, - SeenPositions &seenPositions, - QueueItemRecycler &queueItemRecycler) + SeenPositions &seenPositions, + QueueItemRecycler &queueItemRecycler) { assert(hypos.size()); assert(tps.GetSize()); QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, - queueItemRecycler); + queueItemRecycler); queue.push(item); bool setSeen = SetSeenPosition(0, 0, seenPositions); assert(setSeen); } void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue, - SeenPositions &seenPositions, - QueueItemRecycler &queueItemRecycler) + SeenPositions &seenPositions, + QueueItemRecycler &queueItemRecycler) { size_t hypoIndex = item->hypoIndex; size_t tpIndex = item->tpIndex; @@ -134,7 +132,7 @@ void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue, && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { // reuse incoming queue item to create new item QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, - tpIndex, queueItemRecycler); + tpIndex, queueItemRecycler); assert(newItem == item); queue.push(newItem); item = NULL; @@ -143,7 +141,7 @@ void CubeEdge::CreateNext(Manager &mgr, QueueItem *item, Queue &queue, if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, - tpIndex + 1, queueItemRecycler); + tpIndex + 1, queueItemRecycler); queue.push(newItem); item = NULL; } diff --git a/moses2/PhraseBased/CubePruningMiniStack/Misc.h b/moses2/PhraseBased/CubePruningMiniStack/Misc.h index 535ef6ada..4fc576cba 100644 --- a/moses2/PhraseBased/CubePruningMiniStack/Misc.h +++ b/moses2/PhraseBased/CubePruningMiniStack/Misc.h @@ -38,8 +38,8 @@ class QueueItem ~QueueItem(); // NOT IMPLEMENTED. Use MemPool public: static QueueItem *Create(QueueItem *currItem, Manager &mgr, CubeEdge &edge, - size_t hypoIndex, size_t tpIndex, - QueueItemRecycler &queueItemRecycler); + size_t hypoIndex, size_t tpIndex, + QueueItemRecycler &queueItemRecycler); QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); @@ -56,8 +56,7 @@ protected: class QueueItemOrderer { public: - bool operator()(QueueItem* itemA, QueueItem* itemB) const - { + bool operator()(QueueItem* itemA, QueueItem* itemB) const { HypothesisFutureScoreOrderer orderer; return !orderer(itemA->hypo, itemB->hypo); } @@ -68,11 +67,11 @@ class CubeEdge { public: typedef std::priority_queue >, QueueItemOrderer> Queue; + std::vector >, QueueItemOrderer> Queue; typedef std::pair SeenPositionItem; typedef boost::unordered_set, - std::equal_to, MemPoolAllocator > SeenPositions; + std::equal_to, MemPoolAllocator > SeenPositions; const Hypotheses &hypos; const InputPath &path; @@ -81,16 +80,16 @@ public: SCORE estimatedScore; CubeEdge(Manager &mgr, const Hypotheses &hypos, const InputPath &path, - const TargetPhrases &tps, const Bitmap &newBitmap); + const TargetPhrases &tps, const Bitmap &newBitmap); bool SetSeenPosition(const size_t x, const size_t y, - SeenPositions &seenPositions) const; + SeenPositions &seenPositions) const; void CreateFirst(Manager &mgr, Queue &queue, SeenPositions &seenPositions, - QueueItemRecycler &queueItemRecycler); + QueueItemRecycler &queueItemRecycler); void CreateNext(Manager &mgr, QueueItem *item, Queue &queue, - SeenPositions &seenPositions, - QueueItemRecycler &queueItemRecycler); + SeenPositions &seenPositions, + QueueItemRecycler &queueItemRecycler); std::string Debug(const System &system) const; diff --git a/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index 94baafeb9..74103d211 100644 --- a/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -29,16 +29,16 @@ namespace NSCubePruningMiniStack //////////////////////////////////////////////////////////////////////// Search::Search(Manager &mgr) : - Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool()) + Moses2::Search(mgr), m_stack(mgr), m_cubeEdgeAlloc(mgr.GetPool()) -, m_queue(QueueItemOrderer(), - std::vector >( - MemPoolAllocator(mgr.GetPool()))) + , m_queue(QueueItemOrderer(), + std::vector >( + MemPoolAllocator(mgr.GetPool()))) -, m_seenPositions( - MemPoolAllocator(mgr.GetPool())) + , m_seenPositions( + MemPoolAllocator(mgr.GetPool())) -, m_queueItemRecycler(MemPoolAllocator(mgr.GetPool())) + , m_queueItemRecycler(MemPoolAllocator(mgr.GetPool())) { } @@ -49,197 +49,197 @@ Search::~Search() void Search::Decode() { - const Sentence &sentence = static_cast(mgr.GetInput()); + const Sentence &sentence = static_cast(mgr.GetInput()); - // init cue edges - m_cubeEdges.resize(sentence.GetSize() + 1); - for (size_t i = 0; i < m_cubeEdges.size(); ++i) { - m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges( - m_cubeEdgeAlloc); - } + // init cue edges + m_cubeEdges.resize(sentence.GetSize() + 1); + for (size_t i = 0; i < m_cubeEdges.size(); ++i) { + m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges( + m_cubeEdgeAlloc); + } - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), - initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); - //cerr << "initHypo=" << *initHypo << endl; + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), + initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); + //cerr << "initHypo=" << *initHypo << endl; - m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); - PostDecode(0); + m_stack.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); + PostDecode(0); - for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1; - ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - m_stack.Clear(); - Decode(stackInd); + for (size_t stackInd = 1; stackInd < sentence.GetSize() + 1; + ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + m_stack.Clear(); + Decode(stackInd); PostDecode(stackInd); - //m_stack.DebugCounts(); - } + //m_stack.DebugCounts(); + } } void Search::Decode(size_t stackInd) { - Recycler &hypoRecycler = mgr.GetHypoRecycle(); + Recycler &hypoRecycler = mgr.GetHypoRecycle(); - // reuse queue from previous stack. Clear it first - std::vector > &container = Container( - m_queue); - //cerr << "container=" << container.size() << endl; - BOOST_FOREACH(QueueItem *item, container){ - // recycle unused hypos from queue - Hypothesis *hypo = item->hypo; - hypoRecycler.Recycle(hypo); + // reuse queue from previous stack. Clear it first + std::vector > &container = Container( + m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); - // recycle queue item - m_queueItemRecycler.push_back(item); - } - container.clear(); + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); - m_seenPositions.clear(); + m_seenPositions.clear(); - // add top hypo from every edge into queue - CubeEdges &edges = *m_cubeEdges[stackInd]; + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[stackInd]; - BOOST_FOREACH(CubeEdge *edge, edges){ - //cerr << *edge << " "; - edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); - } + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << *edge << " "; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } - /* - cerr << "edges: "; - boost::unordered_set uniqueBM; - BOOST_FOREACH(CubeEdge *edge, edges) { - uniqueBM.insert(&edge->newBitmap); - //cerr << *edge << " "; - } - cerr << edges.size() << " " << uniqueBM.size(); - cerr << endl; - */ + /* + cerr << "edges: "; + boost::unordered_set uniqueBM; + BOOST_FOREACH(CubeEdge *edge, edges) { + uniqueBM.insert(&edge->newBitmap); + //cerr << *edge << " "; + } + cerr << edges.size() << " " << uniqueBM.size(); + cerr << endl; + */ - size_t pops = 0; - while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) { - // get best hypo from queue, add to stack - //cerr << "queue=" << queue.size() << endl; - QueueItem *item = m_queue.top(); - m_queue.pop(); + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.options.cube.pop_limit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); - CubeEdge *edge = item->edge; + CubeEdge *edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; + // add hypo to stack + Hypothesis *hypo = item->hypo; - if (mgr.system.options.cube.lazy_scoring) { - hypo->EvaluateWhenApplied(); - } + if (mgr.system.options.cube.lazy_scoring) { + hypo->EvaluateWhenApplied(); + } - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stack.Add(hypo, hypoRecycler, mgr.arcLists); + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stack.Add(hypo, hypoRecycler, mgr.arcLists); - edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); - ++pops; - } + ++pops; + } - // create hypo from every edge. Increase diversity - if (mgr.system.options.cube.diversity) { - while (!m_queue.empty()) { - QueueItem *item = m_queue.top(); - m_queue.pop(); + // create hypo from every edge. Increase diversity + if (mgr.system.options.cube.diversity) { + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); - if (item->hypoIndex == 0 && item->tpIndex == 0) { - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stack.Add(hypo, hypoRecycler, mgr.arcLists); - } - } - } + if (item->hypoIndex == 0 && item->tpIndex == 0) { + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stack.Add(hypo, hypoRecycler, mgr.arcLists); + } + } + } } void Search::PostDecode(size_t stackInd) { - MemPool &pool = mgr.GetPool(); + MemPool &pool = mgr.GetPool(); - const InputPaths &paths = mgr.GetInputPaths(); - const Matrix &pathMatrix = paths.GetMatrix(); - size_t inputSize = pathMatrix.GetRows(); - size_t numPaths = pathMatrix.GetCols(); + const InputPaths &paths = mgr.GetInputPaths(); + const Matrix &pathMatrix = paths.GetMatrix(); + size_t inputSize = pathMatrix.GetRows(); + size_t numPaths = pathMatrix.GetCols(); - BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()){ - const Bitmap &hypoBitmap = *val.first.first; - size_t firstGap = hypoBitmap.GetFirstGapPos(); - size_t hypoEndPos = val.first.second; + BOOST_FOREACH(const Stack::Coll::value_type &val, m_stack.GetColl()) { + const Bitmap &hypoBitmap = *val.first.first; + size_t firstGap = hypoBitmap.GetFirstGapPos(); + size_t hypoEndPos = val.first.second; - Moses2::HypothesisColl &hypos = *val.second; + Moses2::HypothesisColl &hypos = *val.second; - //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl; + //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl; - // create edges to next hypos from existing hypos - for (size_t startPos = firstGap; startPos < inputSize; ++startPos) { - for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) { - const InputPath *path = pathMatrix.GetValue(startPos, pathInd); + // create edges to next hypos from existing hypos + for (size_t startPos = firstGap; startPos < inputSize; ++startPos) { + for (size_t pathInd = 0; pathInd < numPaths; ++pathInd) { + const InputPath *path = pathMatrix.GetValue(startPos, pathInd); - if (path == NULL) { - break; - } - if (path->GetNumRules() == 0) { - continue; - } + if (path == NULL) { + break; + } + if (path->GetNumRules() == 0) { + continue; + } - const Range &pathRange = path->range; - //cerr << "pathRange=" << pathRange << endl; - if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { - continue; - } + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } - const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); - if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) { - continue; - } + const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); + if (!reorderingConstraint.Check(hypoBitmap, startPos, pathRange.GetEndPos())) { + continue; + } - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - size_t numWords = newBitmap.GetNumWordsCovered(); + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + size_t numWords = newBitmap.GetNumWordsCovered(); - CubeEdges &edges = *m_cubeEdges[numWords]; + CubeEdges &edges = *m_cubeEdges[numWords]; - // sort hypo for a particular bitmap and hypoEndPos - const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); + // sort hypo for a particular bitmap and hypoEndPos + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); - size_t numPt = mgr.system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = path->targetPhrases[i]; - if (tps && tps->GetSize()) { - CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); - edges.push_back(edge); - } - } - } - } - } + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); + edges.push_back(edge); + } + } + } + } + } } const Hypothesis *Search::GetBestHypo() const { - const Hypothesis *bestHypo = m_stack.GetBestHypo(); - return bestHypo; + const Hypothesis *bestHypo = m_stack.GetBestHypo(); + return bestHypo; } void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { - const Stack::Coll &coll = m_stack.GetColl(); - BOOST_FOREACH(const Stack::Coll::value_type &val, coll){ - Moses2::HypothesisColl &hypos = *val.second; - const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); + const Stack::Coll &coll = m_stack.GetColl(); + BOOST_FOREACH(const Stack::Coll::value_type &val, coll) { + Moses2::HypothesisColl &hypos = *val.second; + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); - BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { - const Hypothesis *hypo = static_cast(hypoBase); - TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); - paths.Add(path); - } - } + BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { + const Hypothesis *hypo = static_cast(hypoBase); + TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); + paths.Add(path); + } + } } } diff --git a/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp index e2b81f0ba..0565aa402 100644 --- a/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp +++ b/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp @@ -20,103 +20,101 @@ namespace Moses2 namespace NSCubePruningMiniStack { Stack::Stack(const Manager &mgr) : - m_mgr(mgr), m_coll( - MemPoolAllocator >( - mgr.GetPool())), m_miniStackRecycler( - MemPoolAllocator(mgr.GetPool())) + m_mgr(mgr), m_coll( + MemPoolAllocator >( + mgr.GetPool())), m_miniStackRecycler( + MemPoolAllocator(mgr.GetPool())) { } Stack::~Stack() { - BOOST_FOREACH(const Coll::value_type &val, m_coll){ - const Moses2::HypothesisColl *miniStack = val.second; - delete miniStack; - } + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + const Moses2::HypothesisColl *miniStack = val.second; + delete miniStack; + } - while (!m_miniStackRecycler.empty()) { - Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back(); - m_miniStackRecycler.pop_back(); - delete miniStack; + while (!m_miniStackRecycler.empty()) { + Moses2::HypothesisColl *miniStack = m_miniStackRecycler.back(); + m_miniStackRecycler.pop_back(); + delete miniStack; - } + } } void Stack::Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists) + ArcLists &arcLists) { - HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); - Moses2::HypothesisColl &coll = GetMiniStack(key); - coll.Add(m_mgr, hypo, hypoRecycle, arcLists); + HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); + Moses2::HypothesisColl &coll = GetMiniStack(key); + coll.Add(m_mgr, hypo, hypoRecycle, arcLists); } const Hypothesis *Stack::GetBestHypo() const { - SCORE bestScore = -std::numeric_limits::infinity(); - const HypothesisBase *bestHypo = NULL; - BOOST_FOREACH(const Coll::value_type &val, m_coll){ - const Moses2::HypothesisColl &hypos = *val.second; - const Moses2::HypothesisBase *hypo = hypos.GetBestHypo(); + SCORE bestScore = -std::numeric_limits::infinity(); + const HypothesisBase *bestHypo = NULL; + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + const Moses2::HypothesisColl &hypos = *val.second; + const Moses2::HypothesisBase *hypo = hypos.GetBestHypo(); - if (hypo && hypo->GetFutureScore() > bestScore) { - bestScore = hypo->GetFutureScore(); - bestHypo = hypo; - } - } - return &bestHypo->Cast(); + if (hypo && hypo->GetFutureScore() > bestScore) { + bestScore = hypo->GetFutureScore(); + bestHypo = hypo; + } + } + return &bestHypo->Cast(); } size_t Stack::GetHypoSize() const { - size_t ret = 0; - BOOST_FOREACH(const Coll::value_type &val, m_coll){ - const Moses2::HypothesisColl &hypos = *val.second; - ret += hypos.GetSize(); - } - return ret; + size_t ret = 0; + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + const Moses2::HypothesisColl &hypos = *val.second; + ret += hypos.GetSize(); + } + return ret; } Moses2::HypothesisColl &Stack::GetMiniStack(const HypoCoverage &key) { - Moses2::HypothesisColl *ret; - Coll::iterator iter = m_coll.find(key); - if (iter == m_coll.end()) { - if (m_miniStackRecycler.empty()) { - ret = new Moses2::HypothesisColl(m_mgr); - } - else { - ret = m_miniStackRecycler.back(); - ret->Clear(); - m_miniStackRecycler.pop_back(); - } + Moses2::HypothesisColl *ret; + Coll::iterator iter = m_coll.find(key); + if (iter == m_coll.end()) { + if (m_miniStackRecycler.empty()) { + ret = new Moses2::HypothesisColl(m_mgr); + } else { + ret = m_miniStackRecycler.back(); + ret->Clear(); + m_miniStackRecycler.pop_back(); + } - m_coll[key] = ret; - } - else { - ret = iter->second; - } - return *ret; + m_coll[key] = ret; + } else { + ret = iter->second; + } + return *ret; } void Stack::Clear() { - BOOST_FOREACH(const Coll::value_type &val, m_coll){ - Moses2::HypothesisColl *miniStack = val.second; - m_miniStackRecycler.push_back(miniStack); - } + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + Moses2::HypothesisColl *miniStack = val.second; + m_miniStackRecycler.push_back(miniStack); + } - m_coll.clear(); + m_coll.clear(); } void Stack::DebugCounts() { - cerr << "counts="; - BOOST_FOREACH(const Coll::value_type &val, GetColl()){ - const Moses2::HypothesisColl &miniStack = *val.second; - size_t count = miniStack.GetSize(); - cerr << count << " "; - } - cerr << endl; + cerr << "counts="; + BOOST_FOREACH(const Coll::value_type &val, GetColl()) { + const Moses2::HypothesisColl &miniStack = *val.second; + size_t count = miniStack.GetSize(); + cerr << count << " "; + } + cerr << endl; } } diff --git a/moses2/PhraseBased/CubePruningMiniStack/Stack.h b/moses2/PhraseBased/CubePruningMiniStack/Stack.h index 7601f90b2..abd564b3f 100644 --- a/moses2/PhraseBased/CubePruningMiniStack/Stack.h +++ b/moses2/PhraseBased/CubePruningMiniStack/Stack.h @@ -36,25 +36,23 @@ public: // bitmap and current endPos of hypos typedef boost::unordered_map, std::equal_to, - MemPoolAllocator > > Coll; + boost::hash, std::equal_to, + MemPoolAllocator > > Coll; Stack(const Manager &mgr); virtual ~Stack(); size_t GetHypoSize() const; - Coll &GetColl() - { + Coll &GetColl() { return m_coll; } - const Coll &GetColl() const - { + const Coll &GetColl() const { return m_coll; } void Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists); + ArcLists &arcLists); Moses2::HypothesisColl &GetMiniStack(const HypoCoverage &key); diff --git a/moses2/PhraseBased/Hypothesis.cpp b/moses2/PhraseBased/Hypothesis.cpp index d59efb11a..e907c1a8b 100644 --- a/moses2/PhraseBased/Hypothesis.cpp +++ b/moses2/PhraseBased/Hypothesis.cpp @@ -30,8 +30,7 @@ Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr) ret = static_cast(recycler.Get()); if (ret) { // got new hypo from recycler. Do nothing - } - else { + } else { ret = new (pool.Allocate()) Hypothesis(pool, mgr.system); //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl; recycler.Keep(ret); @@ -40,7 +39,7 @@ Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr) } Hypothesis::Hypothesis(MemPool &pool, const System &system) : - HypothesisBase(pool, system), m_currTargetWordsRange() + HypothesisBase(pool, system), m_currTargetWordsRange() { } @@ -50,7 +49,7 @@ Hypothesis::~Hypothesis() } void Hypothesis::Init(Manager &mgr, const InputPathBase &path, - const TargetPhraseImpl &tp, const Bitmap &bitmap) + const TargetPhraseImpl &tp, const Bitmap &bitmap) { m_mgr = &mgr; m_targetPhrase = &tp; @@ -66,8 +65,8 @@ void Hypothesis::Init(Manager &mgr, const InputPathBase &path, } void Hypothesis::Init(Manager &mgr, const Hypothesis &prevHypo, - const InputPathBase &path, const TargetPhraseImpl &tp, const Bitmap &bitmap, - SCORE estimatedScore) + const InputPathBase &path, const TargetPhraseImpl &tp, const Bitmap &bitmap, + SCORE estimatedScore) { m_mgr = &mgr; m_targetPhrase = &tp; @@ -76,9 +75,9 @@ void Hypothesis::Init(Manager &mgr, const Hypothesis &prevHypo, m_prevHypo = &prevHypo; m_currTargetWordsRange.SetStartPos( - prevHypo.m_currTargetWordsRange.GetEndPos() + 1); + prevHypo.m_currTargetWordsRange.GetEndPos() + 1); m_currTargetWordsRange.SetEndPos( - prevHypo.m_currTargetWordsRange.GetEndPos() + tp.GetSize()); + prevHypo.m_currTargetWordsRange.GetEndPos() + tp.GetSize()); m_estimatedScore = estimatedScore; @@ -116,7 +115,7 @@ std::string Hypothesis::Debug(const System &system) const // states const std::vector &sfffs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); + GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); size_t numStatefulFFs = sfffs.size(); for (size_t i = 0; i < numStatefulFFs; ++i) { const FFState &state = *GetState(i); @@ -152,8 +151,7 @@ void Hypothesis::OutputToStream(std::ostream &out) const if (m_mgr->system.options.output.ReportSegmentation == 1) { // just report phrase segmentation out << "|" << m_path->range.GetStartPos() << "-" << m_path->range.GetEndPos() << "| "; - } - else if (m_mgr->system.options.output.ReportSegmentation == 2) { + } else if (m_mgr->system.options.output.ReportSegmentation == 2) { // more detailed info about every segment out << "|"; @@ -171,19 +169,19 @@ void Hypothesis::OutputToStream(std::ostream &out) const void Hypothesis::EmptyHypothesisState(const InputType &input) { const std::vector &sfffs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); - BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){ - size_t statefulInd = sfff->GetStatefulInd(); - FFState *state = m_ffStates[statefulInd]; - sfff->EmptyHypothesisState(*state, GetManager(), input, *this); -} + GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); + BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) { + size_t statefulInd = sfff->GetStatefulInd(); + FFState *state = m_ffStates[statefulInd]; + sfff->EmptyHypothesisState(*state, GetManager(), input, *this); + } } void Hypothesis::EvaluateWhenApplied() { const std::vector &sfffs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); - BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){ + GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); + BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) { EvaluateWhenApplied(*sfff); } //cerr << *this << endl; @@ -196,7 +194,7 @@ void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff) FFState *thisState = m_ffStates[statefulInd]; assert(prevState); sfff.EvaluateWhenApplied(GetManager(), *this, *prevState, *m_scores, - *thisState); + *thisState); } diff --git a/moses2/PhraseBased/Hypothesis.h b/moses2/PhraseBased/Hypothesis.h index 7859c1d14..71b95a3e3 100644 --- a/moses2/PhraseBased/Hypothesis.h +++ b/moses2/PhraseBased/Hypothesis.h @@ -35,36 +35,31 @@ public: // initial, empty hypo void Init(Manager &mgr, const InputPathBase &path, const TargetPhraseImpl &tp, - const Bitmap &bitmap); + const Bitmap &bitmap); void Init(Manager &mgr, const Hypothesis &prevHypo, const InputPathBase &path, - const TargetPhraseImpl &tp, const Bitmap &bitmap, SCORE estimatedScore); + const TargetPhraseImpl &tp, const Bitmap &bitmap, SCORE estimatedScore); size_t hash() const; bool operator==(const Hypothesis &other) const; - inline const Bitmap &GetBitmap() const - { + inline const Bitmap &GetBitmap() const { return *m_sourceCompleted; } - inline const InputPathBase &GetInputPath() const - { + inline const InputPathBase &GetInputPath() const { return *m_path; } - inline const Range &GetCurrTargetWordsRange() const - { + inline const Range &GetCurrTargetWordsRange() const { return m_currTargetWordsRange; } - SCORE GetFutureScore() const - { + SCORE GetFutureScore() const { return GetScores().GetTotalScore() + m_estimatedScore; } - const TargetPhrase &GetTargetPhrase() const - { + const TargetPhrase &GetTargetPhrase() const { return *m_targetPhrase; } @@ -77,16 +72,14 @@ public: void EvaluateWhenApplied(); void EvaluateWhenApplied(const StatefulFeatureFunction &sfff); - const Hypothesis* GetPrevHypo() const - { + const Hypothesis* GetPrevHypo() const { return m_prevHypo; } /** curr - pos is relative from CURRENT hypothesis's starting index * (ie, start of sentence would be some negative number, which is * not allowed- USE WITH CAUTION) */ - inline const Word &GetCurrWord(size_t pos) const - { + inline const Word &GetCurrWord(size_t pos) const { return GetTargetPhrase()[pos]; } @@ -108,8 +101,7 @@ protected: class HypothesisTargetPhraseOrderer { public: - bool operator()(const Hypothesis* a, const Hypothesis* b) const - { + bool operator()(const Hypothesis* a, const Hypothesis* b) const { PhraseOrdererLexical phraseCmp; bool ret = phraseCmp(a->GetTargetPhrase(), b->GetTargetPhrase()); /* diff --git a/moses2/PhraseBased/InputPath.cpp b/moses2/PhraseBased/InputPath.cpp index 1a9716380..3761080a4 100644 --- a/moses2/PhraseBased/InputPath.cpp +++ b/moses2/PhraseBased/InputPath.cpp @@ -15,10 +15,10 @@ using namespace std; namespace Moses2 { InputPath::InputPath(MemPool &pool, const SubPhrase &subPhrase, - const Range &range, size_t numPt, const InputPath *prefixPath) -:InputPathBase(pool, range, numPt, prefixPath) -,m_numRules(0) -,subPhrase(subPhrase) + const Range &range, size_t numPt, const InputPath *prefixPath) + :InputPathBase(pool, range, numPt, prefixPath) + ,m_numRules(0) + ,subPhrase(subPhrase) { targetPhrases = pool.Allocate(numPt); Init(targetPhrases, numPt, NULL); @@ -30,7 +30,7 @@ InputPath::~InputPath() } void InputPath::AddTargetPhrases(const PhraseTable &pt, - const TargetPhrases *tps) + const TargetPhrases *tps) { size_t ptInd = pt.GetPtInd(); targetPhrases[ptInd] = tps; diff --git a/moses2/PhraseBased/InputPath.h b/moses2/PhraseBased/InputPath.h index 100649155..b29c7f5ec 100644 --- a/moses2/PhraseBased/InputPath.h +++ b/moses2/PhraseBased/InputPath.h @@ -22,14 +22,15 @@ public: SubPhrase subPhrase; InputPath(MemPool &pool, const SubPhrase &subPhrase, const Range &range, - size_t numPt, const InputPath *prefixPath); + size_t numPt, const InputPath *prefixPath); virtual ~InputPath(); void AddTargetPhrases(const PhraseTable &pt, const TargetPhrases *tps); const TargetPhrases *GetTargetPhrases(const PhraseTable &pt) const; - size_t GetNumRules() const - { return m_numRules; } + size_t GetNumRules() const { + return m_numRules; + } std::string Debug(const System &system) const; diff --git a/moses2/PhraseBased/InputPaths.h b/moses2/PhraseBased/InputPaths.h index dda374515..9089a7c16 100644 --- a/moses2/PhraseBased/InputPaths.h +++ b/moses2/PhraseBased/InputPaths.h @@ -23,16 +23,17 @@ class InputPaths: public InputPathsBase public: void Init(const InputType &input, const ManagerBase &mgr); - const InputPath &GetBlank() const - { + const InputPath &GetBlank() const { return *m_blank; } - Matrix &GetMatrix() - { return *m_matrix; } + Matrix &GetMatrix() { + return *m_matrix; + } - const Matrix &GetMatrix() const - { return *m_matrix; } + const Matrix &GetMatrix() const { + return *m_matrix; + } protected: InputPath *m_blank; diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp index 158b72592..28073d4f6 100644 --- a/moses2/PhraseBased/Manager.cpp +++ b/moses2/PhraseBased/Manager.cpp @@ -37,71 +37,71 @@ using namespace std; namespace Moses2 { Manager::Manager(System &sys, const TranslationTask &task, - const std::string &inputStr, long translationId) : - ManagerBase(sys, task, inputStr, translationId) -,m_search(NULL) -,m_bitmaps(NULL) + const std::string &inputStr, long translationId) : + ManagerBase(sys, task, inputStr, translationId) + ,m_search(NULL) + ,m_bitmaps(NULL) { - //cerr << translationId << " inputStr=" << inputStr << endl; + //cerr << translationId << " inputStr=" << inputStr << endl; } Manager::~Manager() { - //cerr << "Start ~Manager " << this << endl; - delete m_search; - delete m_bitmaps; - //cerr << "Finish ~Manager " << this << endl; + //cerr << "Start ~Manager " << this << endl; + delete m_search; + delete m_bitmaps; + //cerr << "Finish ~Manager " << this << endl; } void Manager::Init() { - // init pools etc - InitPools(); + // init pools etc + InitPools(); - FactorCollection &vocab = system.GetVocab(); - m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr); + FactorCollection &vocab = system.GetVocab(); + m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr); - m_bitmaps = new Bitmaps(GetPool()); + m_bitmaps = new Bitmaps(GetPool()); - const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0]; - m_initPhrase = new (GetPool().Allocate()) TargetPhraseImpl( - GetPool(), firstPt, system, 0); + const PhraseTable &firstPt = *system.featureFunctions.phraseTables[0]; + m_initPhrase = new (GetPool().Allocate()) TargetPhraseImpl( + GetPool(), firstPt, system, 0); - const Sentence &sentence = static_cast(GetInput()); - //cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl; + const Sentence &sentence = static_cast(GetInput()); + //cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl; - m_inputPaths.Init(sentence, *this); + m_inputPaths.Init(sentence, *this); - // xml - const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty(); - UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF"); - unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths); + // xml + const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty(); + UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF"); + unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths); - // lookup with every pt - const std::vector &pts = system.mappings; - for (size_t i = 0; i < pts.size(); ++i) { - const PhraseTable &pt = *pts[i]; - //cerr << "Looking up from " << pt.GetName() << endl; - pt.Lookup(*this, m_inputPaths); - } - //m_inputPaths.DeleteUnusedPaths(); - CalcFutureScore(); + // lookup with every pt + const std::vector &pts = system.mappings; + for (size_t i = 0; i < pts.size(); ++i) { + const PhraseTable &pt = *pts[i]; + //cerr << "Looking up from " << pt.GetName() << endl; + pt.Lookup(*this, m_inputPaths); + } + //m_inputPaths.DeleteUnusedPaths(); + CalcFutureScore(); - m_bitmaps->Init(sentence.GetSize(), vector(0)); + m_bitmaps->Init(sentence.GetSize(), vector(0)); - switch (system.options.search.algo) { - case Normal: - m_search = new NSNormal::Search(*this); - break; - case NormalBatch: - //m_search = new NSBatch::Search(*this); - UTIL_THROW2("Not implemented"); - break; - case CubePruning: - case CubePruningMiniStack: - m_search = new NSCubePruningMiniStack::Search(*this); - break; - /* + switch (system.options.search.algo) { + case Normal: + m_search = new NSNormal::Search(*this); + break; + case NormalBatch: + //m_search = new NSBatch::Search(*this); + UTIL_THROW2("Not implemented"); + break; + case CubePruning: + case CubePruningMiniStack: + m_search = new NSCubePruningMiniStack::Search(*this); + break; + /* case CubePruningPerMiniStack: m_search = new NSCubePruningPerMiniStack::Search(*this); break; @@ -114,166 +114,164 @@ void Manager::Init() case CubePruningBitmapStack: m_search = new NSCubePruningBitmapStack::Search(*this); break; - */ - default: + */ + default: UTIL_THROW2("Unknown search algorithm"); - } + } } void Manager::Decode() { - //cerr << "Start Decode " << this << endl; + //cerr << "Start Decode " << this << endl; - Init(); - m_search->Decode(); + Init(); + m_search->Decode(); - //cerr << "Finished Decode " << this << endl; + //cerr << "Finished Decode " << this << endl; } void Manager::CalcFutureScore() { - const Sentence &sentence = static_cast(GetInput()); - size_t size = sentence.GetSize(); - m_estimatedScores = - new (GetPool().Allocate()) EstimatedScores(GetPool(), - size); - m_estimatedScores->InitTriangle(-numeric_limits::infinity()); + const Sentence &sentence = static_cast(GetInput()); + size_t size = sentence.GetSize(); + m_estimatedScores = + new (GetPool().Allocate()) EstimatedScores(GetPool(), + size); + m_estimatedScores->InitTriangle(-numeric_limits::infinity()); - // walk all the translation options and record the cheapest option for each span - BOOST_FOREACH(const InputPathBase *path, m_inputPaths){ - const Range &range = path->range; - SCORE bestScore = -numeric_limits::infinity(); + // walk all the translation options and record the cheapest option for each span + BOOST_FOREACH(const InputPathBase *path, m_inputPaths) { + const Range &range = path->range; + SCORE bestScore = -numeric_limits::infinity(); - size_t numPt = system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = static_cast(path)->targetPhrases[i]; - if (tps) { - BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) { - SCORE score = tp->GetFutureScore(); - if (score > bestScore) { - bestScore = score; - } - } - } - } - m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore); - } + size_t numPt = system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = static_cast(path)->targetPhrases[i]; + if (tps) { + BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) { + SCORE score = tp->GetFutureScore(); + if (score > bestScore) { + bestScore = score; + } + } + } + } + m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore); + } - // now fill all the cells in the strictly upper triangle - // there is no way to modify the diagonal now, in the case - // where no translation option covers a single-word span, - // we leave the +inf in the matrix - // like in chart parsing we want each cell to contain the highest score - // of the full-span trOpt or the sum of scores of joining two smaller spans + // now fill all the cells in the strictly upper triangle + // there is no way to modify the diagonal now, in the case + // where no translation option covers a single-word span, + // we leave the +inf in the matrix + // like in chart parsing we want each cell to contain the highest score + // of the full-span trOpt or the sum of scores of joining two smaller spans - for (size_t colstart = 1; colstart < size; colstart++) { - for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) { - size_t sPos = diagshift; - size_t ePos = colstart + diagshift; - for (size_t joinAt = sPos; joinAt < ePos; joinAt++) { - float joinedScore = m_estimatedScores->GetValue(sPos, joinAt) - + m_estimatedScores->GetValue(joinAt + 1, ePos); - // uncomment to see the cell filling scheme - // TRACE_ERR("[" << sPos << "," << ePos << "] <-? [" - // << sPos << "," << joinAt << "]+[" - // << joinAt+1 << "," << ePos << "] (colstart: " - // << colstart << ", diagshift: " << diagshift << ")" - // << endl); + for (size_t colstart = 1; colstart < size; colstart++) { + for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) { + size_t sPos = diagshift; + size_t ePos = colstart + diagshift; + for (size_t joinAt = sPos; joinAt < ePos; joinAt++) { + float joinedScore = m_estimatedScores->GetValue(sPos, joinAt) + + m_estimatedScores->GetValue(joinAt + 1, ePos); + // uncomment to see the cell filling scheme + // TRACE_ERR("[" << sPos << "," << ePos << "] <-? [" + // << sPos << "," << joinAt << "]+[" + // << joinAt+1 << "," << ePos << "] (colstart: " + // << colstart << ", diagshift: " << diagshift << ")" + // << endl); - if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue( - sPos, ePos, joinedScore); - } - } - } + if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue( + sPos, ePos, joinedScore); + } + } + } - //cerr << "Square matrix:" << endl; - //cerr << *m_estimatedScores << endl; + //cerr << "Square matrix:" << endl; + //cerr << *m_estimatedScores << endl; } std::string Manager::OutputBest() const { - stringstream out; + stringstream out; Moses2::FixPrecision(out); - const Hypothesis *bestHypo = m_search->GetBestHypo(); - if (bestHypo) { - if (system.options.output.ReportHypoScore) { - out << bestHypo->GetScores().GetTotalScore() << " "; - } + const Hypothesis *bestHypo = m_search->GetBestHypo(); + if (bestHypo) { + if (system.options.output.ReportHypoScore) { + out << bestHypo->GetScores().GetTotalScore() << " "; + } - bestHypo->OutputToStream(out); - //cerr << "BEST TRANSLATION: " << *bestHypo; - } - else { - if (system.options.output.ReportHypoScore) { - out << "0 "; - } - //cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl; - } + bestHypo->OutputToStream(out); + //cerr << "BEST TRANSLATION: " << *bestHypo; + } else { + if (system.options.output.ReportHypoScore) { + out << "0 "; + } + //cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl; + } - return out.str(); - //cerr << endl; + return out.str(); + //cerr << endl; } std::string Manager::OutputNBest() { - arcLists.Sort(); + arcLists.Sort(); - boost::unordered_set distinctHypos; + boost::unordered_set distinctHypos; - TrellisPaths contenders; - m_search->AddInitialTrellisPaths(contenders); + TrellisPaths contenders; + m_search->AddInitialTrellisPaths(contenders); - long transId = GetTranslationId(); + long transId = GetTranslationId(); - // MAIN LOOP - stringstream out; - //Moses2::FixPrecision(out); + // MAIN LOOP + stringstream out; + //Moses2::FixPrecision(out); - size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor; - size_t bestInd = 0; - for (size_t i = 0; i < maxIter; ++i) { - if (bestInd > system.options.nbest.nbest_size || contenders.empty()) { - break; - } + size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor; + size_t bestInd = 0; + for (size_t i = 0; i < maxIter; ++i) { + if (bestInd > system.options.nbest.nbest_size || contenders.empty()) { + break; + } - //cerr << "bestInd=" << bestInd << endl; - TrellisPath *path = contenders.Get(); + //cerr << "bestInd=" << bestInd << endl; + TrellisPath *path = contenders.Get(); - bool ok = false; - if (system.options.nbest.only_distinct) { - string tgtPhrase = path->OutputTargetPhrase(system); - //cerr << "tgtPhrase=" << tgtPhrase << endl; - boost::hash string_hash; - size_t hash = string_hash(tgtPhrase); + bool ok = false; + if (system.options.nbest.only_distinct) { + string tgtPhrase = path->OutputTargetPhrase(system); + //cerr << "tgtPhrase=" << tgtPhrase << endl; + boost::hash string_hash; + size_t hash = string_hash(tgtPhrase); - if (distinctHypos.insert(hash).second) { - ok = true; - } - } - else { - ok = true; - } + if (distinctHypos.insert(hash).second) { + ok = true; + } + } else { + ok = true; + } - if (ok) { - ++bestInd; - out << transId << " ||| "; - path->OutputToStream(out, system); - out << "\n"; - } + if (ok) { + ++bestInd; + out << transId << " ||| "; + path->OutputToStream(out, system); + out << "\n"; + } - // create next paths - path->CreateDeviantPaths(contenders, arcLists, GetPool(), system); + // create next paths + path->CreateDeviantPaths(contenders, arcLists, GetPool(), system); - delete path; - } + delete path; + } - return out.str(); + return out.str(); } std::string Manager::OutputTransOpt() { - return ""; + return ""; } } diff --git a/moses2/PhraseBased/Manager.h b/moses2/PhraseBased/Manager.h index 3f42d6b27..1a348f75f 100644 --- a/moses2/PhraseBased/Manager.h +++ b/moses2/PhraseBased/Manager.h @@ -37,21 +37,25 @@ class Manager: public ManagerBase { public: Manager(System &sys, const TranslationTask &task, const std::string &inputStr, - long translationId); + long translationId); virtual ~Manager(); - Bitmaps &GetBitmaps() - { return *m_bitmaps; } + Bitmaps &GetBitmaps() { + return *m_bitmaps; + } - const EstimatedScores &GetEstimatedScores() const - { return *m_estimatedScores; } + const EstimatedScores &GetEstimatedScores() const { + return *m_estimatedScores; + } - const InputPaths &GetInputPaths() const - { return m_inputPaths; } + const InputPaths &GetInputPaths() const { + return m_inputPaths; + } - const TargetPhraseImpl &GetInitPhrase() const - { return *m_initPhrase; } + const TargetPhraseImpl &GetInitPhrase() const { + return *m_initPhrase; + } void Decode(); std::string OutputBest() const; diff --git a/moses2/PhraseBased/Normal/Search.cpp b/moses2/PhraseBased/Normal/Search.cpp index 7c5026a7c..1c158543d 100644 --- a/moses2/PhraseBased/Normal/Search.cpp +++ b/moses2/PhraseBased/Normal/Search.cpp @@ -26,134 +26,134 @@ namespace NSNormal { Search::Search(Manager &mgr) -:Moses2::Search(mgr) -, m_stacks(mgr) + :Moses2::Search(mgr) + , m_stacks(mgr) { - // TODO Auto-generated constructor stub + // TODO Auto-generated constructor stub } Search::~Search() { - // TODO Auto-generated destructor stub + // TODO Auto-generated destructor stub } void Search::Decode() { - // init stacks - const Sentence &sentence = static_cast(mgr.GetInput()); - m_stacks.Init(mgr, sentence.GetSize() + 1); + // init stacks + const Sentence &sentence = static_cast(mgr.GetInput()); + m_stacks.Init(mgr, sentence.GetSize() + 1); - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), - initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), + initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); - m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); + m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); - for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { - Decode(stackInd); - //cerr << m_stacks << endl; + for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { + Decode(stackInd); + //cerr << m_stacks << endl; - // delete stack to save mem - if (stackInd < m_stacks.GetSize() - 1) { - m_stacks.Delete(stackInd); - } - //cerr << m_stacks.Debug(mgr.system) << endl; - } + // delete stack to save mem + if (stackInd < m_stacks.GetSize() - 1) { + m_stacks.Delete(stackInd); + } + //cerr << m_stacks.Debug(mgr.system) << endl; + } } void Search::Decode(size_t stackInd) { - //cerr << "stackInd=" << stackInd << endl; - Stack &stack = m_stacks[stackInd]; - if (&stack == &m_stacks.Back()) { - // last stack. don't do anythin - return; - } + //cerr << "stackInd=" << stackInd << endl; + Stack &stack = m_stacks[stackInd]; + if (&stack == &m_stacks.Back()) { + // last stack. don't do anythin + return; + } - const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); - //cerr << "hypos=" << hypos.size() << endl; + const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); + //cerr << "hypos=" << hypos.size() << endl; - const InputPaths &paths = mgr.GetInputPaths(); + const InputPaths &paths = mgr.GetInputPaths(); - BOOST_FOREACH(const InputPathBase *path, paths){ - BOOST_FOREACH(const HypothesisBase *hypo, hypos) { - Extend(*static_cast(hypo), *static_cast(path)); - } - } + BOOST_FOREACH(const InputPathBase *path, paths) { + BOOST_FOREACH(const HypothesisBase *hypo, hypos) { + Extend(*static_cast(hypo), *static_cast(path)); + } + } } void Search::Extend(const Hypothesis &hypo, const InputPath &path) { - const Bitmap &hypoBitmap = hypo.GetBitmap(); - const Range &hypoRange = hypo.GetInputPath().range; - const Range &pathRange = path.range; + const Bitmap &hypoBitmap = hypo.GetBitmap(); + const Range &hypoRange = hypo.GetInputPath().range; + const Range &pathRange = path.range; - if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) { - return; - } + if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) { + return; + } - const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); - if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) { - return; - } + const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); + if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) { + return; + } - // extend this hypo - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos()); - SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); + // extend this hypo + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos()); + SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); - size_t numPt = mgr.system.mappings.size(); - const TargetPhrases **tpsAllPt = path.targetPhrases; - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = tpsAllPt[i]; - if (tps) { - Extend(hypo, *tps, path, newBitmap, estimatedScore); - } - } + size_t numPt = mgr.system.mappings.size(); + const TargetPhrases **tpsAllPt = path.targetPhrases; + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = tpsAllPt[i]; + if (tps) { + Extend(hypo, *tps, path, newBitmap, estimatedScore); + } + } } void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) + const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) { - BOOST_FOREACH(const TargetPhraseImpl *tp, tps){ - Extend(hypo, *tp, path, newBitmap, estimatedScore); - } + BOOST_FOREACH(const TargetPhraseImpl *tp, tps) { + Extend(hypo, *tp, path, newBitmap, estimatedScore); + } } void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) + const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) { - Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore); - newHypo->EvaluateWhenApplied(); + Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore); + newHypo->EvaluateWhenApplied(); - m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists); + m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists); - //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other); - //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2); + //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other); + //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2); } const Hypothesis *Search::GetBestHypo() const { - const Stack &lastStack = m_stacks.Back(); - const Hypothesis *best = lastStack.GetBestHypo(); - return best; + const Stack &lastStack = m_stacks.Back(); + const Hypothesis *best = lastStack.GetBestHypo(); + return best; } void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { - const Stack &lastStack = m_stacks.Back(); - const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); + const Stack &lastStack = m_stacks.Back(); + const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); - BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){ - const Hypothesis *hypo = static_cast(hypoBase); - TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); - paths.Add(path); - } + BOOST_FOREACH(const HypothesisBase *hypoBase, hypos) { + const Hypothesis *hypo = static_cast(hypoBase); + TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); + paths.Add(path); + } } } // namespace diff --git a/moses2/PhraseBased/Normal/Search.h b/moses2/PhraseBased/Normal/Search.h index cefefa924..0d487e32b 100644 --- a/moses2/PhraseBased/Normal/Search.h +++ b/moses2/PhraseBased/Normal/Search.h @@ -41,9 +41,9 @@ protected: void Decode(size_t stackInd); void Extend(const Hypothesis &hypo, const InputPath &path); void Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); + const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); + const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); }; diff --git a/moses2/PhraseBased/Normal/Stack.cpp b/moses2/PhraseBased/Normal/Stack.cpp index 782ce5b84..efaa86f2d 100644 --- a/moses2/PhraseBased/Normal/Stack.cpp +++ b/moses2/PhraseBased/Normal/Stack.cpp @@ -20,7 +20,7 @@ namespace NSNormal { Stack::Stack(const Manager &mgr) : - HypothesisColl(mgr) + HypothesisColl(mgr) { // TODO Auto-generated constructor stub diff --git a/moses2/PhraseBased/Normal/Stacks.cpp b/moses2/PhraseBased/Normal/Stacks.cpp index bb7239cf8..a47709676 100644 --- a/moses2/PhraseBased/Normal/Stacks.cpp +++ b/moses2/PhraseBased/Normal/Stacks.cpp @@ -18,7 +18,7 @@ namespace NSNormal { Stacks::Stacks(const Manager &mgr) : - m_mgr(mgr) + m_mgr(mgr) { // TODO Auto-generated constructor stub @@ -46,8 +46,7 @@ std::string Stacks::Debug(const System &system) const const Stack *stack = m_stacks[i]; if (stack) { out << stack->GetSize() << " "; - } - else { + } else { out << "N "; } } @@ -55,7 +54,7 @@ std::string Stacks::Debug(const System &system) const } void Stacks::Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists) + ArcLists &arcLists) { size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); //cerr << "numWordsCovered=" << numWordsCovered << endl; diff --git a/moses2/PhraseBased/Normal/Stacks.h b/moses2/PhraseBased/Normal/Stacks.h index 58626f234..b6da78a4e 100644 --- a/moses2/PhraseBased/Normal/Stacks.h +++ b/moses2/PhraseBased/Normal/Stacks.h @@ -27,29 +27,25 @@ public: void Init(const Manager &mgr, size_t numStacks); - size_t GetSize() const - { + size_t GetSize() const { return m_stacks.size(); } - const Stack &Back() const - { + const Stack &Back() const { return *m_stacks.back(); } - Stack &operator[](size_t ind) - { + Stack &operator[](size_t ind) { return *m_stacks[ind]; } - void Delete(size_t ind) - { + void Delete(size_t ind) { delete m_stacks[ind]; m_stacks[ind] = NULL; } void Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists); + ArcLists &arcLists); std::string Debug(const System &system) const; diff --git a/moses2/PhraseBased/PhraseImpl.cpp b/moses2/PhraseBased/PhraseImpl.cpp index 00f55a35b..d72e36083 100644 --- a/moses2/PhraseBased/PhraseImpl.cpp +++ b/moses2/PhraseBased/PhraseImpl.cpp @@ -11,7 +11,7 @@ using namespace std; namespace Moses2 { PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) + const System &system, const std::string &str) { std::vector toks = Moses2::Tokenize(str); size_t size = toks.size(); diff --git a/moses2/PhraseBased/PhraseImpl.h b/moses2/PhraseBased/PhraseImpl.h index 787cdf58d..f199e62d4 100644 --- a/moses2/PhraseBased/PhraseImpl.h +++ b/moses2/PhraseBased/PhraseImpl.h @@ -9,11 +9,10 @@ class PhraseImpl: public PhraseImplTemplate { public: static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str); + const System &system, const std::string &str); PhraseImpl(MemPool &pool, size_t size) : - PhraseImplTemplate(pool, size) - { + PhraseImplTemplate(pool, size) { } }; diff --git a/moses2/PhraseBased/ReorderingConstraint.cpp b/moses2/PhraseBased/ReorderingConstraint.cpp index cff09cc24..0e84b1f3f 100644 --- a/moses2/PhraseBased/ReorderingConstraint.cpp +++ b/moses2/PhraseBased/ReorderingConstraint.cpp @@ -237,12 +237,12 @@ std::ostream &ReorderingConstraint::Debug(std::ostream &out, const System &syste out << "Walls:"; for (size_t i = 0; i < m_size; ++i) { - out << m_wall[i]; + out << m_wall[i]; } out << " Local walls:"; for (size_t i = 0; i < m_size; ++i) { - out << m_localWall[i] << " "; + out << m_localWall[i] << " "; } return out; diff --git a/moses2/PhraseBased/Search.cpp b/moses2/PhraseBased/Search.cpp index 1a85e15f5..48f9995ff 100644 --- a/moses2/PhraseBased/Search.cpp +++ b/moses2/PhraseBased/Search.cpp @@ -15,7 +15,7 @@ namespace Moses2 { Search::Search(Manager &mgr) : - mgr(mgr) + mgr(mgr) { // TODO Auto-generated constructor stub @@ -27,7 +27,7 @@ Search::~Search() } bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, - const Range &pathRange) + const Range &pathRange) { const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos(); @@ -46,7 +46,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, if (mgr.system.options.reordering.max_distortion >= 0) { // distortion limit int distortion = ComputeDistortionDistance(hypoRangeEndPos, - pathRange.GetStartPos()); + pathRange.GetStartPos()); if (distortion > mgr.system.options.reordering.max_distortion) { //cerr << " NO" << endl; return false; @@ -88,8 +88,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, if (isLeftMostEdge) { // any length extension is okay if starting at left-most edge - } - else { // starting somewhere other than left-most edge, use caution + } else { // starting somewhere other than left-most edge, use caution // the basic idea is this: we would like to translate a phrase // starting from a position further right than the left-most // open gap. The distortion penalty for the following phrase @@ -101,7 +100,7 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, Range bestNextExtension(hypoFirstGapPos, hypoFirstGapPos); if (ComputeDistortionDistance(pathRange.GetEndPos(), - bestNextExtension.GetStartPos()) > mgr.system.options.reordering.max_distortion) { + bestNextExtension.GetStartPos()) > mgr.system.options.reordering.max_distortion) { //cerr << " NO" << endl; return false; } diff --git a/moses2/PhraseBased/Search.h b/moses2/PhraseBased/Search.h index 8e9e9f787..c90856676 100644 --- a/moses2/PhraseBased/Search.h +++ b/moses2/PhraseBased/Search.h @@ -39,16 +39,14 @@ protected: //ArcLists m_arcLists; bool CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, - const Range &pathRange); + const Range &pathRange); inline int ComputeDistortionDistance(size_t prevEndPos, - size_t currStartPos) const - { + size_t currStartPos) const { int dist = 0; if (prevEndPos == NOT_FOUND) { dist = currStartPos; - } - else { + } else { dist = (int)prevEndPos - (int)currStartPos + 1; } return abs(dist); diff --git a/moses2/PhraseBased/Sentence.cpp b/moses2/PhraseBased/Sentence.cpp index dbedf878e..a11ba79cf 100644 --- a/moses2/PhraseBased/Sentence.cpp +++ b/moses2/PhraseBased/Sentence.cpp @@ -17,15 +17,14 @@ namespace Moses2 { Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) + const System &system, const std::string &str) { Sentence *ret; if (system.options.input.xml_policy) { // xml - ret = CreateFromStringXML(pool, vocab, system, str); - } - else { + ret = CreateFromStringXML(pool, vocab, system, str); + } else { // no xml //cerr << "PB Sentence" << endl; std::vector toks = Tokenize(str); @@ -42,82 +41,80 @@ Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, } Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) + const System &system, const std::string &str) { Sentence *ret; - vector xmlOptions; - pugi::xml_document doc; + vector xmlOptions; + pugi::xml_document doc; - string str2 = "" + str + ""; - pugi::xml_parse_result result = doc.load(str2.c_str(), - pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); - pugi::xml_node topNode = doc.child("xml"); + string str2 = "" + str + ""; + pugi::xml_parse_result result = doc.load(str2.c_str(), + pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); + pugi::xml_node topNode = doc.child("xml"); - std::vector toks; - XMLParse(pool, system, 0, topNode, toks, xmlOptions); + std::vector toks; + XMLParse(pool, system, 0, topNode, toks, xmlOptions); - // debug - /* - cerr << "xmloptions:" << endl; - for (size_t i = 0; i < xmlOptions.size(); ++i) { - cerr << xmlOptions[i]->Debug(system) << endl; + // debug + /* + cerr << "xmloptions:" << endl; + for (size_t i = 0; i < xmlOptions.size(); ++i) { + cerr << xmlOptions[i]->Debug(system) << endl; + } + */ + + // create words + size_t size = toks.size(); + ret = new (pool.Allocate()) Sentence(pool, size); + ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, false); + + // xml + ret->Init(system, size, system.options.reordering.max_distortion); + + ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint(); + + // set reordering walls, if "-monotone-at-punction" is set + if (system.options.reordering.monotone_at_punct && ret->GetSize()) { + reorderingConstraint.SetMonotoneAtPunctuation(*ret); + } + + // set walls obtained from xml + for(size_t i=0; iGetNodeName(), "wall") == 0) { + UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please + reorderingConstraint.SetWall(xmlOption->startPos - 1, true); + } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) { + reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 ); + } else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) { + FactorType placeholderFactor = system.options.input.placeholder_factor; + UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, + "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); + UTIL_THROW_IF2(xmlOption->phraseSize != 1, + "Placeholder must only cover 1 word"); + + const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); + (*ret)[xmlOption->startPos][placeholderFactor] = factor; + } else { + // default - forced translation. Add to class variable + ret->AddXMLOption(system, xmlOption); } - */ + } + reorderingConstraint.FinalizeWalls(); - // create words - size_t size = toks.size(); - ret = new (pool.Allocate()) Sentence(pool, size); - ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, false); - - // xml - ret->Init(system, size, system.options.reordering.max_distortion); - - ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint(); - - // set reordering walls, if "-monotone-at-punction" is set - if (system.options.reordering.monotone_at_punct && ret->GetSize()) { - reorderingConstraint.SetMonotoneAtPunctuation(*ret); - } - - // set walls obtained from xml - for(size_t i=0; iGetNodeName(), "wall") == 0) { - UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please - reorderingConstraint.SetWall(xmlOption->startPos - 1, true); - } - else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) { - reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 ); - } - else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) { - FactorType placeholderFactor = system.options.input.placeholder_factor; - UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, - "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); - UTIL_THROW_IF2(xmlOption->phraseSize != 1, - "Placeholder must only cover 1 word"); - - const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); - (*ret)[xmlOption->startPos][placeholderFactor] = factor; - } - else { - // default - forced translation. Add to class variable - ret->AddXMLOption(system, xmlOption); - } - } - reorderingConstraint.FinalizeWalls(); - - return ret; + return ret; } void Sentence::XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector &toks, - vector &xmlOptions) -{ // pugixml + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector &toks, + vector &xmlOptions) +{ + // pugixml for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); //cerr << depth << " nodeName=" << nodeName << endl; @@ -139,17 +136,17 @@ void Sentence::XMLParse( pugi::xml_attribute attr; attr = childNode.attribute("translation"); if (!attr.empty()) { - xmlOption->SetTranslation(pool, attr.as_string()); + xmlOption->SetTranslation(pool, attr.as_string()); } attr = childNode.attribute("entity"); if (!attr.empty()) { - xmlOption->SetEntity(pool, attr.as_string()); + xmlOption->SetEntity(pool, attr.as_string()); } attr = childNode.attribute("prob"); if (!attr.empty()) { - xmlOption->prob = attr.as_float(); + xmlOption->prob = attr.as_float(); } xmlOptions.push_back(xmlOption); diff --git a/moses2/PhraseBased/Sentence.h b/moses2/PhraseBased/Sentence.h index 2e9e834a7..ff7c52138 100644 --- a/moses2/PhraseBased/Sentence.h +++ b/moses2/PhraseBased/Sentence.h @@ -24,11 +24,11 @@ class Sentence: public InputType, public PhraseImpl public: static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str); + const System &system, const std::string &str); Sentence(MemPool &pool, size_t size) - :InputType(pool) - ,PhraseImpl(pool, size) + :InputType(pool) + ,PhraseImpl(pool, size) {} virtual ~Sentence() @@ -36,15 +36,15 @@ public: protected: static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str); + const System &system, const std::string &str); static void XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector &toks, - std::vector &xmlOptions); + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector &toks, + std::vector &xmlOptions); }; diff --git a/moses2/PhraseBased/TargetPhraseImpl.cpp b/moses2/PhraseBased/TargetPhraseImpl.cpp index 3768ca278..d9bc766d9 100644 --- a/moses2/PhraseBased/TargetPhraseImpl.cpp +++ b/moses2/PhraseBased/TargetPhraseImpl.cpp @@ -26,16 +26,16 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool, vector toks = Tokenize(str); size_t size = toks.size(); TargetPhraseImpl *ret = - new (pool.Allocate()) TargetPhraseImpl(pool, pt, system, - size); + new (pool.Allocate()) TargetPhraseImpl(pool, pt, system, + size); ret->PhraseImplTemplate::CreateFromString(vocab, system, toks); return ret; } TargetPhraseImpl::TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, - const System &system, size_t size) -:Moses2::TargetPhrase(pool, pt, system, size) + const System &system, size_t size) + :Moses2::TargetPhrase(pool, pt, system, size) { m_scores = new (pool.Allocate()) Scores(system, pool, system.featureFunctions.GetNumScores()); diff --git a/moses2/PhraseBased/TargetPhraseImpl.h b/moses2/PhraseBased/TargetPhraseImpl.h index a3355ffe6..026414b5d 100644 --- a/moses2/PhraseBased/TargetPhraseImpl.h +++ b/moses2/PhraseBased/TargetPhraseImpl.h @@ -31,19 +31,22 @@ public: static TargetPhraseImpl *CreateFromString(MemPool &pool, const PhraseTable &pt, const System &system, const std::string &str); TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system, - size_t size); + size_t size); //TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl ©); virtual ~TargetPhraseImpl(); - SCORE GetFutureScore() const - { return m_scores->GetTotalScore() + m_estimatedScore; } + SCORE GetFutureScore() const { + return m_scores->GetTotalScore() + m_estimatedScore; + } - void SetEstimatedScore(const SCORE &value) - { m_estimatedScore = value; } + void SetEstimatedScore(const SCORE &value) { + m_estimatedScore = value; + } - virtual SCORE GetScoreForPruning() const - { return GetFutureScore(); } + virtual SCORE GetScoreForPruning() const { + return GetFutureScore(); + } protected: SCORE m_estimatedScore; diff --git a/moses2/PhraseBased/TargetPhrases.cpp b/moses2/PhraseBased/TargetPhrases.cpp index a48afefa9..553312753 100644 --- a/moses2/PhraseBased/TargetPhrases.cpp +++ b/moses2/PhraseBased/TargetPhrases.cpp @@ -17,7 +17,7 @@ namespace Moses2 { TargetPhrases::TargetPhrases(MemPool &pool, size_t size) : - m_coll(pool, size), m_currInd(0) + m_coll(pool, size), m_currInd(0) { } @@ -42,7 +42,7 @@ TargetPhrases::~TargetPhrases() std::string TargetPhrases::Debug(const System &system) const { stringstream out; - BOOST_FOREACH(const TargetPhraseImpl *tp, *this){ + BOOST_FOREACH(const TargetPhraseImpl *tp, *this) { out << tp->Debug(system); out << endl; } @@ -53,11 +53,11 @@ void TargetPhrases::SortAndPrune(size_t tableLimit) { iterator iterMiddle; iterMiddle = - (tableLimit == 0 || m_coll.size() < tableLimit) ? - m_coll.end() : m_coll.begin() + tableLimit; + (tableLimit == 0 || m_coll.size() < tableLimit) ? + m_coll.end() : m_coll.begin() + tableLimit; std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(), - CompareScoreForPruning()); + CompareScoreForPruning()); if (tableLimit && m_coll.size() > tableLimit) { m_coll.resize(tableLimit); diff --git a/moses2/PhraseBased/TargetPhrases.h b/moses2/PhraseBased/TargetPhrases.h index 2582a7386..79595ab40 100644 --- a/moses2/PhraseBased/TargetPhrases.h +++ b/moses2/PhraseBased/TargetPhrases.h @@ -25,12 +25,10 @@ public: typedef Coll::iterator iterator; typedef Coll::const_iterator const_iterator; //! iterators - const_iterator begin() const - { + const_iterator begin() const { return m_coll.begin(); } - const_iterator end() const - { + const_iterator end() const { return m_coll.end(); } @@ -38,18 +36,15 @@ public: //TargetPhrases(MemPool &pool, const System &system, const TargetPhrases ©); virtual ~TargetPhrases(); - void AddTargetPhrase(const TP &targetPhrase) - { + void AddTargetPhrase(const TP &targetPhrase) { m_coll[m_currInd++] = &targetPhrase; } - size_t GetSize() const - { + size_t GetSize() const { return m_coll.size(); } - const TP& operator[](size_t ind) const - { + const TP& operator[](size_t ind) const { return *m_coll[ind]; } diff --git a/moses2/PhraseBased/TrellisPath.cpp b/moses2/PhraseBased/TrellisPath.cpp index a7213fe18..5a1132c60 100644 --- a/moses2/PhraseBased/TrellisPath.cpp +++ b/moses2/PhraseBased/TrellisPath.cpp @@ -27,16 +27,16 @@ std::string TrellisNode::Debug(const System &system) const ///////////////////////////////////////////////////////////////////////////////// TrellisPath::TrellisPath(const Hypothesis *hypo, const ArcLists &arcLists) : - prevEdgeChanged(-1) + prevEdgeChanged(-1) { AddNodes(hypo, arcLists); m_scores = &hypo->GetScores(); } TrellisPath::TrellisPath(const TrellisPath &origPath, size_t edgeIndex, - const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool, - const System &system) : - prevEdgeChanged(edgeIndex) + const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool, + const System &system) : + prevEdgeChanged(edgeIndex) { nodes.reserve(origPath.nodes.size()); for (size_t currEdge = 0; currEdge < edgeIndex; currEdge++) { @@ -64,7 +64,7 @@ TrellisPath::TrellisPath(const TrellisPath &origPath, size_t edgeIndex, const HypothesisBase *newHypo = newNode.GetHypo(); CalcScores(origPath.GetScores(), origHypo->GetScores(), newHypo->GetScores(), - pool, system); + pool, system); } TrellisPath::~TrellisPath() @@ -107,7 +107,7 @@ std::string TrellisPath::OutputTargetPhrase(const System &system) const { std::stringstream out; for (int i = nodes.size() - 2; i >= 0; --i) { - const TrellisNode &node = nodes[i]; + const TrellisNode &node = nodes[i]; const Hypothesis *hypo = static_cast(node.GetHypo()); const TargetPhrase &tp = hypo->GetTargetPhrase(); @@ -121,7 +121,7 @@ std::string TrellisPath::OutputTargetPhrase(const System &system) const } void TrellisPath::CreateDeviantPaths(TrellisPaths &paths, - const ArcLists &arcLists, MemPool &pool, const System &system) const + const ArcLists &arcLists, MemPool &pool, const System &system) const { const size_t sizePath = nodes.size(); @@ -145,8 +145,8 @@ void TrellisPath::CreateDeviantPaths(TrellisPaths &paths, } void TrellisPath::CalcScores(const Scores &origScores, - const Scores &origHypoScores, const Scores &newHypoScores, MemPool &pool, - const System &system) + const Scores &origHypoScores, const Scores &newHypoScores, MemPool &pool, + const System &system) { Scores *scores = new (pool.Allocate()) Scores(system, pool, system.featureFunctions.GetNumScores(), origScores); diff --git a/moses2/PhraseBased/TrellisPath.h b/moses2/PhraseBased/TrellisPath.h index c0b989ad9..6852b43ba 100644 --- a/moses2/PhraseBased/TrellisPath.h +++ b/moses2/PhraseBased/TrellisPath.h @@ -27,12 +27,10 @@ public: size_t ind; TrellisNode(const ArcList &varcList, size_t vind) : - arcList(&varcList), ind(vind) - { + arcList(&varcList), ind(vind) { } - const HypothesisBase *GetHypo() const - { + const HypothesisBase *GetHypo() const { return (*arcList)[ind]; } @@ -55,13 +53,12 @@ public: * which may change other hypo back from there */ TrellisPath(const TrellisPath &origPath, size_t edgeIndex, - const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool, - const System &system); + const TrellisNode &newNode, const ArcLists &arcLists, MemPool &pool, + const System &system); virtual ~TrellisPath(); - const Scores &GetScores() const - { + const Scores &GetScores() const { return *m_scores; } SCORE GetFutureScore() const; @@ -73,14 +70,14 @@ public: //! create a set of next best paths by wiggling 1 of the node at a time. void CreateDeviantPaths(TrellisPaths &paths, const ArcLists &arcLists, - MemPool &pool, const System &system) const; + MemPool &pool, const System &system) const; protected: const Scores *m_scores; void AddNodes(const Hypothesis *hypo, const ArcLists &arcLists); void CalcScores(const Scores &origScores, const Scores &origHypoScores, - const Scores &newHypoScores, MemPool &pool, const System &system); + const Scores &newHypoScores, MemPool &pool, const System &system); }; } /* namespace Moses2 */ diff --git a/moses2/PhraseImplTemplate.h b/moses2/PhraseImplTemplate.h index a9d377bb0..a3ef32a7f 100644 --- a/moses2/PhraseImplTemplate.h +++ b/moses2/PhraseImplTemplate.h @@ -21,15 +21,13 @@ class PhraseImplTemplate : public Phrase { public: PhraseImplTemplate(MemPool &pool, size_t size) : - m_size(size) - { + m_size(size) { m_words = new (pool.Allocate(size)) WORD[size]; } PhraseImplTemplate(MemPool &pool, const PhraseImplTemplate ©) : - m_size(copy.GetSize()) - { + m_size(copy.GetSize()) { m_words = new (pool.Allocate(m_size)) WORD[m_size]; for (size_t i = 0; i < m_size; ++i) { const WORD &word = copy[i]; @@ -37,21 +35,22 @@ public: } } - virtual ~PhraseImplTemplate() - { + virtual ~PhraseImplTemplate() { } - size_t GetSize() const - { return m_size; } + size_t GetSize() const { + return m_size; + } - WORD& operator[](size_t pos) - { return m_words[pos]; } + WORD& operator[](size_t pos) { + return m_words[pos]; + } - const WORD& operator[](size_t pos) const - { return m_words[pos]; } + const WORD& operator[](size_t pos) const { + return m_words[pos]; + } - SubPhrase GetSubPhrase(size_t start, size_t size) const - { + SubPhrase GetSubPhrase(size_t start, size_t size) const { SubPhrase ret(*this, start, size); return ret; } @@ -61,8 +60,7 @@ protected: WORD *m_words; void CreateFromString(FactorCollection &vocab, const System &system, - const std::vector &toks, bool addBOSEOS = false) - { + const std::vector &toks, bool addBOSEOS = false) { size_t startPos = 0; if (addBOSEOS) { startPos = 1; diff --git a/moses2/Recycler.h b/moses2/Recycler.h index 3751a2a93..60bdddf31 100644 --- a/moses2/Recycler.h +++ b/moses2/Recycler.h @@ -18,45 +18,37 @@ class Recycler { public: Recycler() : - m_currInd(0) - { + m_currInd(0) { } - virtual ~Recycler() - { + virtual ~Recycler() { } - T Get() - { + T Get() { if (!m_coll.empty()) { T &obj = m_coll.back(); m_coll.pop_back(); return obj; - } - else if (m_currInd) { + } else if (m_currInd) { --m_currInd; T &obj = m_all[m_currInd]; return obj; - } - else { + } else { return NULL; } } - void Clear() - { + void Clear() { m_coll.clear(); m_currInd = m_all.size(); } // call this for new objects when u 1st create it. It is assumed the object will be used right away - void Keep(const T& val) - { + void Keep(const T& val) { m_all.push_back(val); } // call this for existing object to put back into queue for reuse - void Recycle(const T& val) - { + void Recycle(const T& val) { m_coll.push_back(val); } diff --git a/moses2/SCFG/ActiveChart.cpp b/moses2/SCFG/ActiveChart.cpp index 711767b2f..fb4d84bfb 100644 --- a/moses2/SCFG/ActiveChart.cpp +++ b/moses2/SCFG/ActiveChart.cpp @@ -17,12 +17,12 @@ SymbolBindElement::SymbolBindElement() } SymbolBindElement::SymbolBindElement( - const Moses2::Range &range, - const SCFG::Word &word, - const Moses2::Hypotheses *hypos) -:m_range(&range) -,word(&word) -,hypos(hypos) + const Moses2::Range &range, + const SCFG::Word &word, + const Moses2::Hypotheses *hypos) + :m_range(&range) + ,word(&word) + ,hypos(hypos) { assert( (word.isNonTerminal && hypos) || (!word.isNonTerminal && hypos == NULL)); } @@ -37,19 +37,19 @@ size_t hash_value(const SymbolBindElement &obj) std::string SymbolBindElement::Debug(const System &system) const { - stringstream out; - out << "("; - out << *m_range; - out << word->Debug(system); - out << ")"; + stringstream out; + out << "("; + out << *m_range; + out << word->Debug(system); + out << ")"; return out.str(); } //////////////////////////////////////////////////////////////////////////// SymbolBind::SymbolBind(MemPool &pool) -:coll(pool) -,numNT(0) + :coll(pool) + ,numNT(0) { } @@ -83,19 +83,19 @@ std::string SymbolBind::Debug(const System &system) const { stringstream out; BOOST_FOREACH(const SymbolBindElement &ele, coll) { - out << ele.Debug(system) << " "; + out << ele.Debug(system) << " "; } return out.str(); } //////////////////////////////////////////////////////////////////////////// ActiveChartEntry::ActiveChartEntry(MemPool &pool) -:m_symbolBind(pool) + :m_symbolBind(pool) { } //////////////////////////////////////////////////////////////////////////// ActiveChart::ActiveChart(MemPool &pool) -:entries(pool) + :entries(pool) { } diff --git a/moses2/SCFG/ActiveChart.h b/moses2/SCFG/ActiveChart.h index ed9f35d92..baf3a09dd 100644 --- a/moses2/SCFG/ActiveChart.h +++ b/moses2/SCFG/ActiveChart.h @@ -28,13 +28,13 @@ public: SymbolBindElement(); SymbolBindElement(const Moses2::Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos); - const Range &GetRange() const - { return *m_range; } + const Range &GetRange() const { + return *m_range; + } - bool operator==(const SymbolBindElement &compare) const - { + bool operator==(const SymbolBindElement &compare) const { bool ret = hypos == compare.hypos - && word == compare.word; + && word == compare.word; return ret; } @@ -58,19 +58,21 @@ public: SymbolBind(MemPool &pool); SymbolBind(MemPool &pool, const SymbolBind ©) - :coll(copy.coll) - ,numNT(copy.numNT) + :coll(copy.coll) + ,numNT(copy.numNT) {} - size_t GetSize() const - { return coll.size(); } + size_t GetSize() const { + return coll.size(); + } std::vector GetNTElements() const; void Add(const Range &range, const SCFG::Word &word, const Moses2::Hypotheses *hypos); - bool operator==(const SymbolBind &compare) const - { return coll == compare.coll; } + bool operator==(const SymbolBind &compare) const { + return coll == compare.coll; + } std::string Debug(const System &system) const; @@ -88,20 +90,19 @@ public: ActiveChartEntry(MemPool &pool); ActiveChartEntry(MemPool &pool, const ActiveChartEntry &prevEntry) - :m_symbolBind(pool, prevEntry.GetSymbolBind()) - { + :m_symbolBind(pool, prevEntry.GetSymbolBind()) { //symbolBinds = new (pool.Allocate()) SymbolBind(pool, *prevEntry.symbolBinds); } - const SymbolBind &GetSymbolBind() const - { return m_symbolBind; } + const SymbolBind &GetSymbolBind() const { + return m_symbolBind; + } virtual void AddSymbolBindElement( - const Range &range, - const SCFG::Word &word, - const Moses2::Hypotheses *hypos, - const PhraseTable &pt) - { + const Range &range, + const SCFG::Word &word, + const Moses2::Hypotheses *hypos, + const PhraseTable &pt) { m_symbolBind.Add(range, word, hypos); } diff --git a/moses2/SCFG/Hypothesis.cpp b/moses2/SCFG/Hypothesis.cpp index 28411a43e..c7ae8c798 100644 --- a/moses2/SCFG/Hypothesis.cpp +++ b/moses2/SCFG/Hypothesis.cpp @@ -19,35 +19,34 @@ namespace SCFG Hypothesis *Hypothesis::Create(MemPool &pool, Manager &mgr) { // ++g_numHypos; - Hypothesis *ret; - //ret = new (pool.Allocate()) Hypothesis(pool, mgr.system); + Hypothesis *ret; + //ret = new (pool.Allocate()) Hypothesis(pool, mgr.system); - Recycler &recycler = mgr.GetHypoRecycle(); - ret = static_cast(recycler.Get()); - if (ret) { - // got new hypo from recycler. Do nothing - } - else { - ret = new (pool.Allocate()) Hypothesis(pool, mgr.system); - //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl; - recycler.Keep(ret); - } - return ret; + Recycler &recycler = mgr.GetHypoRecycle(); + ret = static_cast(recycler.Get()); + if (ret) { + // got new hypo from recycler. Do nothing + } else { + ret = new (pool.Allocate()) Hypothesis(pool, mgr.system); + //cerr << "Hypothesis=" << sizeof(Hypothesis) << " " << ret << endl; + recycler.Keep(ret); + } + return ret; } Hypothesis::Hypothesis(MemPool &pool, - const System &system) -:HypothesisBase(pool, system) -,m_prevHypos(pool) + const System &system) + :HypothesisBase(pool, system) + ,m_prevHypos(pool) { } void Hypothesis::Init(SCFG::Manager &mgr, - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind, - const SCFG::TargetPhraseImpl &tp, - const Vector &prevHyposIndices) + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind, + const SCFG::TargetPhraseImpl &tp, + const Vector &prevHyposIndices) { m_mgr = &mgr; m_targetPhrase = &tp; @@ -91,8 +90,8 @@ SCORE Hypothesis::GetFutureScore() const void Hypothesis::EvaluateWhenApplied() { const std::vector &sfffs = - GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); - BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){ + GetManager().system.featureFunctions.GetStatefulFeatureFunctions(); + BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) { EvaluateWhenApplied(*sfff); } //cerr << *this << endl; @@ -105,7 +104,7 @@ void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff) size_t statefulInd = sfff.GetStatefulInd(); FFState *thisState = m_ffStates[statefulInd]; sfff.EvaluateWhenApplied(mgr, *this, statefulInd, GetScores(), - *thisState); + *thisState); } @@ -123,8 +122,7 @@ void Hypothesis::OutputToStream(std::ostream &strm) const size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos]; const Hypothesis *prevHypo = m_prevHypos[nonTermInd]; prevHypo->OutputToStream(strm); - } - else { + } else { word.OutputToStream(*m_mgr, targetPos, *this, strm); strm << " "; } @@ -166,13 +164,13 @@ std::string Hypothesis::Debug(const System &system) const void Hypothesis::OutputTransOpt(std::ostream &out) const { - out << GetInputPath().range << " " - << "score=" << GetScores().GetTotalScore() << " " - << GetTargetPhrase().Debug(m_mgr->system) << endl; + out << GetInputPath().range << " " + << "score=" << GetScores().GetTotalScore() << " " + << GetTargetPhrase().Debug(m_mgr->system) << endl; - BOOST_FOREACH(const Hypothesis *prevHypo, m_prevHypos) { - prevHypo->OutputTransOpt(out); - } + BOOST_FOREACH(const Hypothesis *prevHypo, m_prevHypos) { + prevHypo->OutputTransOpt(out); + } } } // namespaces diff --git a/moses2/SCFG/Hypothesis.h b/moses2/SCFG/Hypothesis.h index 8ece45bb6..fbbd663aa 100644 --- a/moses2/SCFG/Hypothesis.h +++ b/moses2/SCFG/Hypothesis.h @@ -23,29 +23,34 @@ public: static Hypothesis *Create(MemPool &pool, Manager &mgr); void Init(SCFG::Manager &mgr, - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind, - const SCFG::TargetPhraseImpl &tp, - const Vector &prevHyposIndices); + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind, + const SCFG::TargetPhraseImpl &tp, + const Vector &prevHyposIndices); virtual SCORE GetFutureScore() const; virtual void EvaluateWhenApplied(); - const SCFG::TargetPhraseImpl &GetTargetPhrase() const - { return *m_targetPhrase; } + const SCFG::TargetPhraseImpl &GetTargetPhrase() const { + return *m_targetPhrase; + } - const SCFG::InputPath &GetInputPath() const - { return *m_path; } + const SCFG::InputPath &GetInputPath() const { + return *m_path; + } - const SCFG::SymbolBind &GetSymbolBind() const - { return *m_symbolBind; } + const SCFG::SymbolBind &GetSymbolBind() const { + return *m_symbolBind; + } - const Vector &GetPrevHypos() const - { return m_prevHypos; } + const Vector &GetPrevHypos() const { + return m_prevHypos; + } //! get a particular previous hypos - const Hypothesis* GetPrevHypo(size_t ind) const - { return m_prevHypos[ind]; } + const Hypothesis* GetPrevHypo(size_t ind) const { + return m_prevHypos[ind]; + } void OutputToStream(std::ostream &strm) const; void OutputTransOpt(std::ostream &strm) const; @@ -60,7 +65,7 @@ protected: Vector m_prevHypos; // always sorted by source position? Hypothesis(MemPool &pool, - const System &system); + const System &system); void EvaluateWhenApplied(const StatefulFeatureFunction &sfff); diff --git a/moses2/SCFG/InputPath.cpp b/moses2/SCFG/InputPath.cpp index 1ebbbf327..4fcbbb2b0 100644 --- a/moses2/SCFG/InputPath.cpp +++ b/moses2/SCFG/InputPath.cpp @@ -19,10 +19,10 @@ namespace SCFG { InputPath::InputPath(MemPool &pool, const SubPhrase &subPhrase, - const Range &range, size_t numPt, const InputPath *prefixPath) -:InputPathBase(pool, range, numPt, prefixPath) -,subPhrase(subPhrase) -,targetPhrases(MemPoolAllocator(pool)) + const Range &range, size_t numPt, const InputPath *prefixPath) + :InputPathBase(pool, range, numPt, prefixPath) + ,subPhrase(subPhrase) + ,targetPhrases(MemPoolAllocator(pool)) { m_activeChart = pool.Allocate(numPt); for (size_t i = 0; i < numPt; ++i) { @@ -68,14 +68,14 @@ std::string InputPath::Debug(const System &system) const } void InputPath::AddTargetPhrasesToPath( - MemPool &pool, - const System &system, - const PhraseTable &pt, - const SCFG::TargetPhrases &tps, - const SCFG::SymbolBind &symbolBind) + MemPool &pool, + const System &system, + const PhraseTable &pt, + const SCFG::TargetPhrases &tps, + const SCFG::SymbolBind &symbolBind) { targetPhrases.push_back(Element(symbolBind, &tps)); - /* + /* Coll::iterator iterColl; iterColl = targetPhrases.find(symbolBind); assert(iterColl == targetPhrases.end()); diff --git a/moses2/SCFG/InputPath.h b/moses2/SCFG/InputPath.h index c8a7253c2..bef9e0a79 100644 --- a/moses2/SCFG/InputPath.h +++ b/moses2/SCFG/InputPath.h @@ -35,20 +35,21 @@ public: SubPhrase subPhrase; InputPath(MemPool &pool, const SubPhrase &subPhrase, const Range &range, - size_t numPt, const InputPath *prefixPath); + size_t numPt, const InputPath *prefixPath); virtual ~InputPath(); - const ActiveChart &GetActiveChart(size_t ptInd) const - { return m_activeChart[ptInd]; } + const ActiveChart &GetActiveChart(size_t ptInd) const { + return m_activeChart[ptInd]; + } void AddActiveChartEntry(size_t ptInd, ActiveChartEntry *chartEntry); void AddTargetPhrasesToPath( - MemPool &pool, - const System &system, - const PhraseTable &pt, - const SCFG::TargetPhrases &tps, - const SCFG::SymbolBind &symbolBind); + MemPool &pool, + const System &system, + const PhraseTable &pt, + const SCFG::TargetPhrases &tps, + const SCFG::SymbolBind &symbolBind); size_t GetNumRules() const; diff --git a/moses2/SCFG/InputPaths.cpp b/moses2/SCFG/InputPaths.cpp index e1c3f9d21..77478cd98 100644 --- a/moses2/SCFG/InputPaths.cpp +++ b/moses2/SCFG/InputPaths.cpp @@ -56,7 +56,7 @@ void InputPaths::Init(const InputType &input, const ManagerBase &mgr) Range range(startPos, endPos); SCFG::InputPath *path = new (pool.Allocate()) - SCFG::InputPath(pool, subPhrase, range, numPt, prefixPath); + SCFG::InputPath(pool, subPhrase, range, numPt, prefixPath); //cerr << "path=" << *path << endl; m_inputPaths.push_back(path); diff --git a/moses2/SCFG/InputPaths.h b/moses2/SCFG/InputPaths.h index 37e2404cf..57c45414f 100644 --- a/moses2/SCFG/InputPaths.h +++ b/moses2/SCFG/InputPaths.h @@ -26,8 +26,7 @@ class InputPaths: public InputPathsBase public: void Init(const InputType &input, const ManagerBase &mgr); - const Matrix &GetMatrix() const - { + const Matrix &GetMatrix() const { return *m_matrix; } diff --git a/moses2/SCFG/Manager.cpp b/moses2/SCFG/Manager.cpp index 5db4e2a89..6e10b32ed 100644 --- a/moses2/SCFG/Manager.cpp +++ b/moses2/SCFG/Manager.cpp @@ -28,8 +28,8 @@ namespace SCFG { Manager::Manager(System &sys, const TranslationTask &task, - const std::string &inputStr, long translationId) -:ManagerBase(sys, task, inputStr, translationId) + const std::string &inputStr, long translationId) + :ManagerBase(sys, task, inputStr, translationId) { } @@ -48,7 +48,7 @@ void Manager::Decode() FactorCollection &vocab = system.GetVocab(); m_input = Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr, - m_translationId); + m_translationId); const SCFG::Sentence &sentence = static_cast(GetInput()); @@ -100,15 +100,15 @@ void Manager::Decode() void Manager::InitActiveChart(SCFG::InputPath &path) { - size_t numPt = system.mappings.size(); - //cerr << "numPt=" << numPt << endl; + size_t numPt = system.mappings.size(); + //cerr << "numPt=" << numPt << endl; - for (size_t i = 0; i < numPt; ++i) { - const PhraseTable &pt = *system.mappings[i]; - //cerr << "START InitActiveChart" << endl; - pt.InitActiveChart(GetPool(), *this, path); - //cerr << "FINISHED InitActiveChart" << endl; - } + for (size_t i = 0; i < numPt; ++i) { + const PhraseTable &pt = *system.mappings[i]; + //cerr << "START InitActiveChart" << endl; + pt.InitActiveChart(GetPool(), *this, path); + //cerr << "FINISHED InitActiveChart" << endl; + } } void Manager::Lookup(SCFG::InputPath &path) @@ -202,9 +202,9 @@ void Manager::Decode(SCFG::InputPath &path, Stack &stack) } void Manager::CreateQueue( - const SCFG::InputPath &path, - const SymbolBind &symbolBind, - const SCFG::TargetPhrases &tps) + const SCFG::InputPath &path, + const SymbolBind &symbolBind, + const SCFG::TargetPhrases &tps) { MemPool &pool = GetPool(); @@ -255,10 +255,10 @@ void Manager::Decode(SCFG::InputPath &path, Stack &stack) */ void Manager::ExpandHypo( - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind, - const SCFG::TargetPhraseImpl &tp, - Stack &stack) + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind, + const SCFG::TargetPhraseImpl &tp, + Stack &stack) { Recycler &hypoRecycler = GetHypoRecycle(); @@ -280,9 +280,9 @@ void Manager::ExpandHypo( } bool Manager::IncrPrevHypoIndices( - Vector &prevHyposIndices, - size_t ind, - const std::vector ntEles) + Vector &prevHyposIndices, + size_t ind, + const std::vector ntEles) { if (ntEles.size() == 0) { // no nt. Do the 1st @@ -319,8 +319,7 @@ bool Manager::IncrPrevHypoIndices( if (ind >= numHypos) { return false; - } - else { + } else { return true; } } @@ -345,8 +344,7 @@ std::string Manager::OutputBest() const if (system.options.output.ReportHypoScore) { out = SPrint(bestHypo->GetScores().GetTotalScore()) + " " + out; } - } - else { + } else { if (system.options.output.ReportHypoScore) { out = "0 "; } @@ -377,12 +375,11 @@ std::string Manager::OutputTransOpt() const SCFG::Hypothesis *bestHypo = lastStack.GetBestHypo(); if (bestHypo) { - stringstream outStrm; - bestHypo->OutputTransOpt(outStrm); - return outStrm.str(); - } - else { - return ""; + stringstream outStrm; + bestHypo->OutputTransOpt(outStrm); + return outStrm.str(); + } else { + return ""; } } diff --git a/moses2/SCFG/Manager.h b/moses2/SCFG/Manager.h index 6bd53cc89..a9a575896 100644 --- a/moses2/SCFG/Manager.h +++ b/moses2/SCFG/Manager.h @@ -29,7 +29,7 @@ class Manager: public Moses2::ManagerBase { public: Manager(System &sys, const TranslationTask &task, const std::string &inputStr, - long translationId); + long translationId); virtual ~Manager(); void Decode(); @@ -37,14 +37,17 @@ public: std::string OutputNBest(); std::string OutputTransOpt(); - const InputPaths &GetInputPaths() const - { return m_inputPaths; } + const InputPaths &GetInputPaths() const { + return m_inputPaths; + } - QueueItemRecycler &GetQueueItemRecycler() - { return m_queueItemRecycler; } + QueueItemRecycler &GetQueueItemRecycler() { + return m_queueItemRecycler; + } - const Stacks &GetStacks() const - { return m_stacks; } + const Stacks &GetStacks() const { + return m_stacks; + } protected: Stacks m_stacks; @@ -56,15 +59,15 @@ protected: void Decode(SCFG::InputPath &path, Stack &stack); void ExpandHypo( - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind, - const SCFG::TargetPhraseImpl &tp, - Stack &stack); + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind, + const SCFG::TargetPhraseImpl &tp, + Stack &stack); bool IncrPrevHypoIndices( - Vector &prevHyposIndices, - size_t ind, - const std::vector ntEles); + Vector &prevHyposIndices, + size_t ind, + const std::vector ntEles); // cube pruning Queue m_queue; @@ -73,9 +76,9 @@ protected: QueueItemRecycler m_queueItemRecycler; void CreateQueue( - const SCFG::InputPath &path, - const SymbolBind &symbolBind, - const SCFG::TargetPhrases &tps); + const SCFG::InputPath &path, + const SymbolBind &symbolBind, + const SCFG::TargetPhrases &tps); }; } diff --git a/moses2/SCFG/Misc.cpp b/moses2/SCFG/Misc.cpp index 1ab053b60..9a340928a 100644 --- a/moses2/SCFG/Misc.cpp +++ b/moses2/SCFG/Misc.cpp @@ -20,25 +20,25 @@ namespace SCFG //////////////////////////////////////////////////////// SeenPosition::SeenPosition(MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vtps, - size_t numNT) -:symbolBind(vSymbolBind) -,tps(vtps) -,tpInd(0) -,hypoIndColl(pool, numNT, 0) + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vtps, + size_t numNT) + :symbolBind(vSymbolBind) + ,tps(vtps) + ,tpInd(0) + ,hypoIndColl(pool, numNT, 0) { } SeenPosition::SeenPosition(MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vtps, - size_t vtpInd, - const Vector &vhypoIndColl) -:symbolBind(vSymbolBind) -,tps(vtps) -,tpInd(vtpInd) -,hypoIndColl(pool, vhypoIndColl.size()) + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vtps, + size_t vtpInd, + const Vector &vhypoIndColl) + :symbolBind(vSymbolBind) + ,tps(vtps) + ,tpInd(vtpInd) + ,hypoIndColl(pool, vhypoIndColl.size()) { for (size_t i = 0; i < hypoIndColl.size(); ++i) { hypoIndColl[i] = vhypoIndColl[i]; @@ -60,7 +60,7 @@ std::string SeenPosition::Debug(const System &system) const bool SeenPosition::operator==(const SeenPosition &compare) const { if (&symbolBind != &compare.symbolBind) { - return false; + return false; } if (&tps != &compare.tps) { @@ -106,8 +106,7 @@ QueueItem *QueueItem::Create(MemPool &pool, SCFG::Manager &mgr) // use item from recycle bin ret = queueItemRecycler.back(); queueItemRecycler.pop_back(); - } - else { + } else { // create new item ret = new (pool.Allocate()) QueueItem(pool); } @@ -117,16 +116,16 @@ QueueItem *QueueItem::Create(MemPool &pool, SCFG::Manager &mgr) } QueueItem::QueueItem(MemPool &pool) -:m_hypoIndColl(NULL) + :m_hypoIndColl(NULL) { } void QueueItem::Init( - MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vTPS, - const Vector &hypoIndColl) + MemPool &pool, + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vTPS, + const Vector &hypoIndColl) { symbolBind = &vSymbolBind; tps = &vTPS; @@ -136,11 +135,11 @@ void QueueItem::Init( } void QueueItem::Init( - MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vTPS, - size_t vTPInd, - const Vector &hypoIndColl) + MemPool &pool, + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vTPS, + size_t vTPInd, + const Vector &hypoIndColl) { symbolBind = &vSymbolBind; tps = &vTPS; @@ -155,10 +154,10 @@ void QueueItem::AddHypos(const Moses2::Hypotheses &hypos) } void QueueItem::CreateHypo( - MemPool &systemPool, - SCFG::Manager &mgr, - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind) + MemPool &systemPool, + SCFG::Manager &mgr, + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind) { const SCFG::TargetPhraseImpl &tp = (*tps)[tpInd]; @@ -168,12 +167,12 @@ void QueueItem::CreateHypo( } void QueueItem::CreateNext( - MemPool &systemPool, - MemPool &mgrPool, - SCFG::Manager &mgr, - SCFG::Queue &queue, - SeenPositions &seenPositions, - const SCFG::InputPath &path) + MemPool &systemPool, + MemPool &mgrPool, + SCFG::Manager &mgr, + SCFG::Queue &queue, + SeenPositions &seenPositions, + const SCFG::InputPath &path) { //cerr << "tpInd=" << tpInd << " " << tps->GetSize() << endl; if (tpInd + 1 < tps->GetSize()) { diff --git a/moses2/SCFG/Misc.h b/moses2/SCFG/Misc.h index 0e1c2a015..27b9df79a 100644 --- a/moses2/SCFG/Misc.h +++ b/moses2/SCFG/Misc.h @@ -31,14 +31,14 @@ public: Vector hypoIndColl; SeenPosition(MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vtps, - size_t numNT); + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vtps, + size_t numNT); SeenPosition(MemPool &pool, - const SymbolBind &vSymbolBind, - const SCFG::TargetPhrases &vtps, - size_t vtpInd, - const Vector &vhypoIndColl); + const SymbolBind &vSymbolBind, + const SCFG::TargetPhrases &vtps, + size_t vtpInd, + const Vector &vhypoIndColl); bool operator==(const SeenPosition &compare) const; size_t hash() const; @@ -54,13 +54,14 @@ class SeenPositions public: bool Add(const SeenPosition *item); - void clear() - { m_coll.clear(); } + void clear() { + m_coll.clear(); + } protected: typedef boost::unordered_set, UnorderedComparer > Coll; + UnorderedComparer, UnorderedComparer > Coll; Coll m_coll; }; @@ -73,30 +74,30 @@ public: static QueueItem *Create(MemPool &pool, SCFG::Manager &mgr); void Init( - MemPool &pool, - const SymbolBind &symbolBind, - const SCFG::TargetPhrases &tps, - const Vector &hypoIndColl); + MemPool &pool, + const SymbolBind &symbolBind, + const SCFG::TargetPhrases &tps, + const Vector &hypoIndColl); void Init( - MemPool &pool, - const SymbolBind &symbolBind, - const SCFG::TargetPhrases &tps, - size_t vTPInd, - const Vector &hypoIndColl); + MemPool &pool, + const SymbolBind &symbolBind, + const SCFG::TargetPhrases &tps, + size_t vTPInd, + const Vector &hypoIndColl); void AddHypos(const Moses2::Hypotheses &hypos); void CreateHypo( - MemPool &systemPool, - SCFG::Manager &mgr, - const SCFG::InputPath &path, - const SCFG::SymbolBind &symbolBind); + MemPool &systemPool, + SCFG::Manager &mgr, + const SCFG::InputPath &path, + const SCFG::SymbolBind &symbolBind); void CreateNext( - MemPool &systemPool, - MemPool &mgrPool, - SCFG::Manager &mgr, - SCFG::Queue &queue, - SeenPositions &seenPositions, - const SCFG::InputPath &path); + MemPool &systemPool, + MemPool &mgrPool, + SCFG::Manager &mgr, + SCFG::Queue &queue, + SeenPositions &seenPositions, + const SCFG::InputPath &path); std::string Debug(const System &system) const; @@ -109,7 +110,7 @@ protected: size_t tpInd; const Vector *m_hypoIndColl; // pointer to variable in seen position - // hypos and ind to the 1 we're using + // hypos and ind to the 1 we're using QueueItem(MemPool &pool); @@ -123,8 +124,7 @@ typedef std::deque QueueItemRecycler; class QueueItemOrderer { public: - bool operator()(QueueItem* itemA, QueueItem* itemB) const - { + bool operator()(QueueItem* itemA, QueueItem* itemB) const { HypothesisFutureScoreOrderer orderer; return !orderer(itemA->hypo, itemB->hypo); } @@ -132,8 +132,8 @@ public: /////////////////////////////////////////// class Queue : public std::priority_queue, - QueueItemOrderer> + std::vector, + QueueItemOrderer> { }; diff --git a/moses2/SCFG/PhraseImpl.cpp b/moses2/SCFG/PhraseImpl.cpp index 028ede8b0..398e8e217 100644 --- a/moses2/SCFG/PhraseImpl.cpp +++ b/moses2/SCFG/PhraseImpl.cpp @@ -18,7 +18,7 @@ PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab, std::vector toks = Moses2::Tokenize(str); size_t size = toks.size(); if (skipLastWord) { - --size; + --size; } PhraseImpl *ret; diff --git a/moses2/SCFG/PhraseImpl.h b/moses2/SCFG/PhraseImpl.h index f26de313d..f61bf2915 100644 --- a/moses2/SCFG/PhraseImpl.h +++ b/moses2/SCFG/PhraseImpl.h @@ -12,11 +12,10 @@ class PhraseImpl: public PhraseImplTemplate { public: static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str, bool skipLastWord = true); + const System &system, const std::string &str, bool skipLastWord = true); PhraseImpl(MemPool &pool, size_t size) : - PhraseImplTemplate(pool, size) - { + PhraseImplTemplate(pool, size) { } }; diff --git a/moses2/SCFG/Sentence.cpp b/moses2/SCFG/Sentence.cpp index 5e69a7e23..de82e3ee9 100644 --- a/moses2/SCFG/Sentence.cpp +++ b/moses2/SCFG/Sentence.cpp @@ -15,7 +15,7 @@ namespace Moses2 namespace SCFG { Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str, long translationId) + const System &system, const std::string &str, long translationId) { //cerr << "SCFG Sentence" << endl; @@ -23,15 +23,14 @@ Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, if (system.options.input.xml_policy) { // xml - ret = CreateFromStringXML(pool, vocab, system, str); - //cerr << "ret=" << ret->Debug(system) << endl; - } - else { - std::vector toks = Tokenize(str); - size_t size = toks.size() + 2; + ret = CreateFromStringXML(pool, vocab, system, str); + //cerr << "ret=" << ret->Debug(system) << endl; + } else { + std::vector toks = Tokenize(str); + size_t size = toks.size() + 2; - ret = new (pool.Allocate()) Sentence(pool, size); - ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); + ret = new (pool.Allocate()) Sentence(pool, size); + ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); } @@ -39,65 +38,65 @@ Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, } Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) + const System &system, const std::string &str) { Sentence *ret; - vector xmlOptions; - pugi::xml_document doc; + vector xmlOptions; + pugi::xml_document doc; - string str2 = "" + str + ""; - pugi::xml_parse_result result = doc.load(str2.c_str(), - pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); - pugi::xml_node topNode = doc.child("xml"); + string str2 = "" + str + ""; + pugi::xml_parse_result result = doc.load(str2.c_str(), + pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); + pugi::xml_node topNode = doc.child("xml"); - std::vector toks; - XMLParse(pool, system, 0, topNode, toks, xmlOptions); + std::vector toks; + XMLParse(pool, system, 0, topNode, toks, xmlOptions); - // debug - /* - cerr << "xmloptions:" << endl; - for (size_t i = 0; i < xmlOptions.size(); ++i) { - cerr << xmlOptions[i]->Debug(system) << endl; + // debug + /* + cerr << "xmloptions:" << endl; + for (size_t i = 0; i < xmlOptions.size(); ++i) { + cerr << xmlOptions[i]->Debug(system) << endl; + } + */ + + // create words + size_t size = toks.size() + 2; + ret = new (pool.Allocate()) Sentence(pool, size); + ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); + + // xml + for(size_t i=0; iGetNodeName(), "ne") == 0) { + FactorType placeholderFactor = system.options.input.placeholder_factor; + UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, + "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); + UTIL_THROW_IF2(xmlOption->phraseSize != 1, + "Placeholder must only cover 1 word"); + + const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); + (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor; + } else { + // default - forced translation. Add to class variable + ret->AddXMLOption(system, xmlOption); } - */ + } - // create words - size_t size = toks.size() + 2; - ret = new (pool.Allocate()) Sentence(pool, size); - ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); - - // xml - for(size_t i=0; iGetNodeName(), "ne") == 0) { - FactorType placeholderFactor = system.options.input.placeholder_factor; - UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, - "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); - UTIL_THROW_IF2(xmlOption->phraseSize != 1, - "Placeholder must only cover 1 word"); - - const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); - (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor; - } - else { - // default - forced translation. Add to class variable - ret->AddXMLOption(system, xmlOption); - } - } - - //cerr << "ret=" << ret->Debug(system) << endl; - return ret; + //cerr << "ret=" << ret->Debug(system) << endl; + return ret; } void Sentence::XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector &toks, - vector &xmlOptions) -{ // pugixml + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector &toks, + vector &xmlOptions) +{ + // pugixml for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); //cerr << depth << " nodeName=" << nodeName << endl; @@ -119,17 +118,17 @@ void Sentence::XMLParse( pugi::xml_attribute attr; attr = childNode.attribute("translation"); if (!attr.empty()) { - xmlOption->SetTranslation(pool, attr.as_string()); + xmlOption->SetTranslation(pool, attr.as_string()); } attr = childNode.attribute("entity"); if (!attr.empty()) { - xmlOption->SetEntity(pool, attr.as_string()); + xmlOption->SetEntity(pool, attr.as_string()); } attr = childNode.attribute("prob"); if (!attr.empty()) { - xmlOption->prob = attr.as_float(); + xmlOption->prob = attr.as_float(); } xmlOptions.push_back(xmlOption); diff --git a/moses2/SCFG/Sentence.h b/moses2/SCFG/Sentence.h index 7652a677e..1f4378caf 100644 --- a/moses2/SCFG/Sentence.h +++ b/moses2/SCFG/Sentence.h @@ -25,11 +25,11 @@ class Sentence: public InputType, public PhraseImpl { public: static Sentence *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str, long translationId); + const System &system, const std::string &str, long translationId); Sentence(MemPool &pool, size_t size) - :InputType(pool) - ,PhraseImpl(pool, size) + :InputType(pool) + ,PhraseImpl(pool, size) {} virtual ~Sentence() @@ -37,15 +37,15 @@ public: protected: static Sentence *CreateFromStringXML(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str); + const System &system, const std::string &str); static void XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector &toks, - std::vector &xmlOptions); + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector &toks, + std::vector &xmlOptions); }; diff --git a/moses2/SCFG/Stack.cpp b/moses2/SCFG/Stack.cpp index 163761a49..25517d006 100644 --- a/moses2/SCFG/Stack.cpp +++ b/moses2/SCFG/Stack.cpp @@ -13,7 +13,7 @@ namespace SCFG { Stack::Stack(const Manager &mgr) -:m_mgr(mgr) + :m_mgr(mgr) { } @@ -26,7 +26,7 @@ Stack::~Stack() } void Stack::Add(SCFG::Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists) + ArcLists &arcLists) { const SCFG::TargetPhraseImpl &tp = hypo->GetTargetPhrase(); const SCFG::Word &lhs = tp.lhs; @@ -52,8 +52,7 @@ const Moses2::HypothesisColl *Stack::GetColl(const SCFG::Word &nt) const Coll::const_iterator iter = m_coll.find(nt); if (iter != m_coll.end()) { return NULL; - } - else { + } else { return iter->second; } } @@ -66,8 +65,7 @@ Moses2::HypothesisColl &Stack::GetColl(const SCFG::Word &nt) if (iter == m_coll.end()) { ret = new Moses2::HypothesisColl(m_mgr); m_coll[nt] = ret; - } - else { + } else { ret = iter->second; } return *ret; @@ -77,7 +75,7 @@ const Hypothesis *Stack::GetBestHypo() const { SCORE bestScore = -std::numeric_limits::infinity(); const HypothesisBase *bestHypo = NULL; - BOOST_FOREACH(const Coll::value_type &val, m_coll){ + BOOST_FOREACH(const Coll::value_type &val, m_coll) { const Moses2::HypothesisColl &hypos = *val.second; const Moses2::HypothesisBase *hypo = hypos.GetBestHypo(); diff --git a/moses2/SCFG/Stack.h b/moses2/SCFG/Stack.h index eb7ce2706..413f0749b 100644 --- a/moses2/SCFG/Stack.h +++ b/moses2/SCFG/Stack.h @@ -22,15 +22,16 @@ public: Stack(const Manager &mgr); virtual ~Stack(); - const Coll &GetColl() const - { return m_coll; } + const Coll &GetColl() const { + return m_coll; + } const Moses2::HypothesisColl *GetColl(const SCFG::Word &nt) const; size_t GetSize() const; void Add(SCFG::Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists); + ArcLists &arcLists); const Hypothesis *GetBestHypo() const; diff --git a/moses2/SCFG/Stacks.h b/moses2/SCFG/Stacks.h index 6594d5763..09aedb01e 100644 --- a/moses2/SCFG/Stacks.h +++ b/moses2/SCFG/Stacks.h @@ -17,16 +17,19 @@ public: void Init(SCFG::Manager &mgr, size_t size); - const Stack &GetStack(size_t startPos, size_t size) const - { return *m_cells[startPos][size - 1]; } + const Stack &GetStack(size_t startPos, size_t size) const { + return *m_cells[startPos][size - 1]; + } - Stack &GetStack(size_t startPos, size_t size) - { return *m_cells[startPos][size - 1]; } + Stack &GetStack(size_t startPos, size_t size) { + return *m_cells[startPos][size - 1]; + } void OutputStacks() const; - const Stack &GetLastStack() const - { return GetStack(0, m_cells.size()); } + const Stack &GetLastStack() const { + return GetStack(0, m_cells.size()); + } protected: std::vector > m_cells; diff --git a/moses2/SCFG/TargetPhraseImpl.cpp b/moses2/SCFG/TargetPhraseImpl.cpp index ebea6cef7..e58e057e4 100644 --- a/moses2/SCFG/TargetPhraseImpl.cpp +++ b/moses2/SCFG/TargetPhraseImpl.cpp @@ -30,8 +30,8 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool, vector toks = Tokenize(str); size_t size = toks.size() - 1; TargetPhraseImpl *ret = - new (pool.Allocate()) TargetPhraseImpl(pool, pt, system, - size); + new (pool.Allocate()) TargetPhraseImpl(pool, pt, system, + size); for (size_t i = 0; i < size; ++i) { SCFG::Word &word = (*ret)[i]; @@ -45,11 +45,11 @@ TargetPhraseImpl *TargetPhraseImpl::CreateFromString(MemPool &pool, } TargetPhraseImpl::TargetPhraseImpl(MemPool &pool, - const PhraseTable &pt, - const System &system, - size_t size) -:Moses2::TargetPhrase(pool, pt, system, size) -,m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) + const PhraseTable &pt, + const System &system, + size_t size) + :Moses2::TargetPhrase(pool, pt, system, size) + ,m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) { m_scores = new (pool.Allocate()) Scores(system, pool, @@ -111,13 +111,13 @@ void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString) size_t TargetPhraseImpl::GetNumNonTerms() const { - size_t ret = 0; - for (size_t i = 0; i < GetSize(); ++i) { - if ((*this)[i].isNonTerminal) { - ++ret; - } - } - return ret; + size_t ret = 0; + for (size_t i = 0; i < GetSize(); ++i) { + if ((*this)[i].isNonTerminal) { + ++ret; + } + } + return ret; } diff --git a/moses2/SCFG/TargetPhraseImpl.h b/moses2/SCFG/TargetPhraseImpl.h index f526d02e7..286ce2157 100644 --- a/moses2/SCFG/TargetPhraseImpl.h +++ b/moses2/SCFG/TargetPhraseImpl.h @@ -38,7 +38,7 @@ public: const PhraseTable &pt, const System &system, const std::string &str); TargetPhraseImpl(MemPool &pool, const PhraseTable &pt, const System &system, - size_t size); + size_t size); //TargetPhraseImpl(MemPool &pool, const System &system, const TargetPhraseImpl ©); virtual ~TargetPhraseImpl(); @@ -48,19 +48,22 @@ public: } void SetAlignNonTerm(const AlignmentInfo &alignInfo) { - m_alignNonTerm = &alignInfo; + m_alignNonTerm = &alignInfo; } void SetAlignmentInfo(const std::string &alignString); - SCORE GetFutureScore() const - { return m_scores->GetTotalScore() + m_estimatedScore; } + SCORE GetFutureScore() const { + return m_scores->GetTotalScore() + m_estimatedScore; + } - virtual SCORE GetScoreForPruning() const - { return GetFutureScore(); } + virtual SCORE GetScoreForPruning() const { + return GetFutureScore(); + } - void SetEstimatedScore(const SCORE &value) - { m_estimatedScore = value; } + void SetEstimatedScore(const SCORE &value) { + m_estimatedScore = value; + } std::string Debug(const System &system) const; diff --git a/moses2/SCFG/TargetPhrases.cpp b/moses2/SCFG/TargetPhrases.cpp index f3d4b9790..fbef79e9c 100644 --- a/moses2/SCFG/TargetPhrases.cpp +++ b/moses2/SCFG/TargetPhrases.cpp @@ -18,12 +18,12 @@ namespace Moses2 namespace SCFG { TargetPhrases::TargetPhrases(MemPool &pool) -:m_coll(pool) + :m_coll(pool) { } TargetPhrases::TargetPhrases(MemPool &pool, size_t size) -:m_coll(pool) + :m_coll(pool) { m_coll.reserve(size); } @@ -37,11 +37,11 @@ void TargetPhrases::SortAndPrune(size_t tableLimit) { iterator iterMiddle; iterMiddle = - (tableLimit == 0 || m_coll.size() < tableLimit) ? - m_coll.end() : m_coll.begin() + tableLimit; + (tableLimit == 0 || m_coll.size() < tableLimit) ? + m_coll.end() : m_coll.begin() + tableLimit; std::partial_sort(m_coll.begin(), iterMiddle, m_coll.end(), - CompareScoreForPruning()); + CompareScoreForPruning()); if (tableLimit && m_coll.size() > tableLimit) { m_coll.resize(tableLimit); diff --git a/moses2/SCFG/TargetPhrases.h b/moses2/SCFG/TargetPhrases.h index 22502b3ef..8bdea7d09 100644 --- a/moses2/SCFG/TargetPhrases.h +++ b/moses2/SCFG/TargetPhrases.h @@ -27,17 +27,14 @@ public: typedef Coll::iterator iterator; typedef Coll::const_iterator const_iterator; //! iterators - const_iterator begin() const - { + const_iterator begin() const { return m_coll.begin(); } - const_iterator end() const - { + const_iterator end() const { return m_coll.end(); } - const SCFG::TargetPhraseImpl& operator[](size_t ind) const - { + const SCFG::TargetPhraseImpl& operator[](size_t ind) const { return *m_coll[ind]; } @@ -45,11 +42,11 @@ public: TargetPhrases(MemPool &pool, size_t size); virtual ~TargetPhrases(); - size_t GetSize() const - { return m_coll.size(); } + size_t GetSize() const { + return m_coll.size(); + } - void AddTargetPhrase(const SCFG::TargetPhraseImpl &targetPhrase) - { + void AddTargetPhrase(const SCFG::TargetPhraseImpl &targetPhrase) { m_coll.push_back(&targetPhrase); } diff --git a/moses2/SCFG/Word.cpp b/moses2/SCFG/Word.cpp index 1794706da..8f67fb0fa 100644 --- a/moses2/SCFG/Word.cpp +++ b/moses2/SCFG/Word.cpp @@ -22,14 +22,14 @@ namespace Moses2 namespace SCFG { Word::Word(const SCFG::Word ©) -:Moses2::Word(copy) -,isNonTerminal(copy.isNonTerminal) + :Moses2::Word(copy) + ,isNonTerminal(copy.isNonTerminal) { } void Word::CreateFromString(FactorCollection &vocab, - const System &system, - const std::string &str) + const System &system, + const std::string &str) { vector toks; @@ -43,13 +43,11 @@ void Word::CreateFromString(FactorCollection &vocab, assert(startPos != string::npos); string str2 = str.substr(startPos + 1, str.size() - startPos - 2); toks = Tokenize(str2, "|"); - } - else { + } else { string str2 = str.substr(1, str.size() - 2); toks = Tokenize(str2, "|"); } - } - else { + } else { isNonTerminal = false; toks = Tokenize(str, "|"); } @@ -75,9 +73,9 @@ size_t Word::hash(const std::vector &factors) const { size_t seed = isNonTerminal; for (size_t i = 0; i < factors.size(); ++i) { - FactorType factorType = factors[i]; - const Factor *factor = m_factors[factorType]; - boost::hash_combine(seed, factor); + FactorType factorType = factors[i]; + const Factor *factor = m_factors[factorType]; + boost::hash_combine(seed, factor); } return seed; } @@ -89,46 +87,46 @@ void Word::OutputToStream(const System &system, std::ostream &out) const } Moses2::Word::OutputToStream(system, out); if (isNonTerminal) { - out << "]"; + out << "]"; } } void Word::OutputToStream( - const ManagerBase &mgr, - size_t targetPos, - const SCFG::Hypothesis &hypo, - std::ostream &out) const + const ManagerBase &mgr, + size_t targetPos, + const SCFG::Hypothesis &hypo, + std::ostream &out) const { const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase(); const SCFG::SymbolBind &symbolBind = hypo.GetSymbolBind(); - bool outputWord = true; - if (mgr.system.options.input.placeholder_factor != NOT_FOUND) { - const AlignmentInfo &alignInfo = tp.GetAlignTerm(); - std::set sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos); - if (sourceAligns.size() == 1) { - size_t sourcePos = *sourceAligns.begin(); - /* - cerr << "sourcePos=" << sourcePos << endl; - cerr << "tp=" << tp.Debug(mgr.system) << endl; - cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl; - */ - assert(sourcePos < symbolBind.GetSize()); - const Range &inputRange = symbolBind.coll[sourcePos].GetRange(); - assert(inputRange.GetNumWordsCovered() == 1); - const SCFG::Sentence &sentence = static_cast(mgr.GetInput()); - const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()]; - const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor]; - if (factor) { - out << factor->GetString(); - outputWord = false; - } - } + bool outputWord = true; + if (mgr.system.options.input.placeholder_factor != NOT_FOUND) { + const AlignmentInfo &alignInfo = tp.GetAlignTerm(); + std::set sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos); + if (sourceAligns.size() == 1) { + size_t sourcePos = *sourceAligns.begin(); + /* + cerr << "sourcePos=" << sourcePos << endl; + cerr << "tp=" << tp.Debug(mgr.system) << endl; + cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl; + */ + assert(sourcePos < symbolBind.GetSize()); + const Range &inputRange = symbolBind.coll[sourcePos].GetRange(); + assert(inputRange.GetNumWordsCovered() == 1); + const SCFG::Sentence &sentence = static_cast(mgr.GetInput()); + const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()]; + const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor]; + if (factor) { + out << factor->GetString(); + outputWord = false; + } } + } - if (outputWord){ - OutputToStream(mgr.system, out); - } + if (outputWord) { + OutputToStream(mgr.system, out); + } } std::string Word::Debug(const System &system) const @@ -139,7 +137,7 @@ std::string Word::Debug(const System &system) const } out << Moses2::Word::Debug(system); if (isNonTerminal) { - out << "]"; + out << "]"; } return out.str(); } diff --git a/moses2/SCFG/Word.h b/moses2/SCFG/Word.h index 0c3aa158a..e039f92e8 100644 --- a/moses2/SCFG/Word.h +++ b/moses2/SCFG/Word.h @@ -26,16 +26,14 @@ public: explicit Word(const SCFG::Word ©); void CreateFromString(FactorCollection &vocab, - const System &system, - const std::string &str); + const System &system, + const std::string &str); - bool operator==(const SCFG::Word &compare) const - { + bool operator==(const SCFG::Word &compare) const { int cmp = Moses2::Word::Compare(compare); if (cmp == 0 && isNonTerminal == compare.isNonTerminal) { return true; - } - else { + } else { return false; } } @@ -45,10 +43,10 @@ public: virtual void OutputToStream(const System &system, std::ostream &out) const; virtual void OutputToStream( - const ManagerBase &mgr, - size_t targetPos, - const SCFG::Hypothesis &hypo, - std::ostream &out) const; + const ManagerBase &mgr, + size_t targetPos, + const SCFG::Hypothesis &hypo, + std::ostream &out) const; virtual std::string Debug(const System &system) const; @@ -56,7 +54,9 @@ protected: }; inline size_t hash_value(const SCFG::Word &word) -{ return word.hash(); } +{ + return word.hash(); +} } } diff --git a/moses2/SCFG/nbest/KBestExtractor.cpp b/moses2/SCFG/nbest/KBestExtractor.cpp index ae7ec8634..14d12c4b5 100644 --- a/moses2/SCFG/nbest/KBestExtractor.cpp +++ b/moses2/SCFG/nbest/KBestExtractor.cpp @@ -26,7 +26,7 @@ namespace SCFG { ///////////////////////////////////////////////////////////// KBestExtractor::KBestExtractor(const SCFG::Manager &mgr) -:m_mgr(mgr) + :m_mgr(mgr) { } @@ -37,37 +37,37 @@ KBestExtractor::~KBestExtractor() void KBestExtractor::OutputToStream(std::stringstream &strm) { - //cerr << "1" << flush; - const Stack &lastStack = m_mgr.GetStacks().GetLastStack(); - UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack"); - UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection"); + //cerr << "1" << flush; + const Stack &lastStack = m_mgr.GetStacks().GetLastStack(); + UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack"); + UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection"); - const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists); - UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection"); - const HypothesisBase *hypo = hypos[0]; + const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists); + UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection"); + const HypothesisBase *hypo = hypos[0]; - const ArcLists &arcLists = m_mgr.arcLists; - const ArcList &arcList = arcLists.GetArcList(hypo); - NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList); + const ArcLists &arcLists = m_mgr.arcLists; + const ArcList &arcList = arcLists.GetArcList(hypo); + NBests &nbests = m_nbestColl.GetOrCreateNBests(m_mgr, arcList); - size_t ind = 0; - while (nbests.Extend(m_mgr, m_nbestColl, ind)) { - const NBest &deriv = nbests.Get(ind); - strm << m_mgr.GetTranslationId() << " ||| "; - //cerr << "1" << flush; - strm << deriv.GetStringExclSentenceMarkers(); - //cerr << "2" << flush; - strm << " ||| "; - deriv.GetScores().OutputBreakdown(strm, m_mgr.system); - //cerr << "3" << flush; - strm << "||| "; - strm << deriv.GetScores().GetTotalScore(); - //cerr << "4" << flush; + size_t ind = 0; + while (nbests.Extend(m_mgr, m_nbestColl, ind)) { + const NBest &deriv = nbests.Get(ind); + strm << m_mgr.GetTranslationId() << " ||| "; + //cerr << "1" << flush; + strm << deriv.GetStringExclSentenceMarkers(); + //cerr << "2" << flush; + strm << " ||| "; + deriv.GetScores().OutputBreakdown(strm, m_mgr.system); + //cerr << "3" << flush; + strm << "||| "; + strm << deriv.GetScores().GetTotalScore(); + //cerr << "4" << flush; - strm << endl; + strm << endl; - ++ind; - } + ++ind; + } } } diff --git a/moses2/SCFG/nbest/NBest.cpp b/moses2/SCFG/nbest/NBest.cpp index 99c005ee3..1057fa004 100644 --- a/moses2/SCFG/nbest/NBest.cpp +++ b/moses2/SCFG/nbest/NBest.cpp @@ -22,126 +22,126 @@ namespace SCFG { NBest::NBest( - const SCFG::Manager &mgr, - const ArcList &varcList, - size_t vind, - NBestColl &nbestColl) -:arcList(&varcList) -,arcInd(vind) + const SCFG::Manager &mgr, + const ArcList &varcList, + size_t vind, + NBestColl &nbestColl) + :arcList(&varcList) + ,arcInd(vind) { - const SCFG::Hypothesis &hypo = GetHypo(); + const SCFG::Hypothesis &hypo = GetHypo(); - // copy scores from best hypo - MemPool &pool = mgr.GetPool(); - m_scores = new (pool.Allocate()) - Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores()); + // copy scores from best hypo + MemPool &pool = mgr.GetPool(); + m_scores = new (pool.Allocate()) + Scores(mgr.system, pool, mgr.system.featureFunctions.GetNumScores(), hypo.GetScores()); - // children - const ArcLists &arcLists = mgr.arcLists; - //const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase(); + // children + const ArcLists &arcLists = mgr.arcLists; + //const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase(); - const Vector &prevHypos = hypo.GetPrevHypos(); - for (size_t i = 0; i < prevHypos.size(); ++i) { - const SCFG::Hypothesis *prevHypo = prevHypos[i]; - const ArcList &childArc = arcLists.GetArcList(prevHypo); - NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc); - Child child(&childNBests, 0); - children.push_back(child); - } + const Vector &prevHypos = hypo.GetPrevHypos(); + for (size_t i = 0; i < prevHypos.size(); ++i) { + const SCFG::Hypothesis *prevHypo = prevHypos[i]; + const ArcList &childArc = arcLists.GetArcList(prevHypo); + NBests &childNBests = nbestColl.GetOrCreateNBests(mgr, childArc); + Child child(&childNBests, 0); + children.push_back(child); + } - stringstream strm; - OutputToStream(mgr, strm); - m_str = strm.str(); + stringstream strm; + OutputToStream(mgr, strm); + m_str = strm.str(); } NBest::NBest(const SCFG::Manager &mgr, - const NBest &orig, - size_t childInd, - NBestColl &nbestColl) -:arcList(orig.arcList) -,arcInd(orig.arcInd) -,children(orig.children) + const NBest &orig, + size_t childInd, + NBestColl &nbestColl) + :arcList(orig.arcList) + ,arcInd(orig.arcInd) + ,children(orig.children) { - Child &child = children[childInd]; - size_t &ind = child.second; - ++ind; - UTIL_THROW_IF2(ind >= child.first->GetSize(), - "out of bound:" << ind << ">=" << child.first->GetSize()); + Child &child = children[childInd]; + size_t &ind = child.second; + ++ind; + UTIL_THROW_IF2(ind >= child.first->GetSize(), + "out of bound:" << ind << ">=" << child.first->GetSize()); - // scores - MemPool &pool = mgr.GetPool(); - m_scores = new (pool.Allocate()) - Scores(mgr.system, - pool, - mgr.system.featureFunctions.GetNumScores(), - orig.GetScores()); + // scores + MemPool &pool = mgr.GetPool(); + m_scores = new (pool.Allocate()) + Scores(mgr.system, + pool, + mgr.system.featureFunctions.GetNumScores(), + orig.GetScores()); - const Scores &origScores = orig.GetChild(childInd).GetScores(); - const Scores &newScores = GetChild(childInd).GetScores(); + const Scores &origScores = orig.GetChild(childInd).GetScores(); + const Scores &newScores = GetChild(childInd).GetScores(); - m_scores->MinusEquals(mgr.system, origScores); - m_scores->PlusEquals(mgr.system, newScores); + m_scores->MinusEquals(mgr.system, origScores); + m_scores->PlusEquals(mgr.system, newScores); - stringstream strm; - OutputToStream(mgr, strm); - m_str = strm.str(); + stringstream strm; + OutputToStream(mgr, strm); + m_str = strm.str(); } const SCFG::Hypothesis &NBest::GetHypo() const { - const HypothesisBase *hypoBase = (*arcList)[arcInd]; - const SCFG::Hypothesis &hypo = *static_cast(hypoBase); - return hypo; + const HypothesisBase *hypoBase = (*arcList)[arcInd]; + const SCFG::Hypothesis &hypo = *static_cast(hypoBase); + return hypo; } const NBest &NBest::GetChild(size_t ind) const { - const Child &child = children[ind]; - const NBests &nbests = *child.first; - const NBest &nbest = nbests.Get(child.second); - return nbest; + const Child &child = children[ind]; + const NBests &nbests = *child.first; + const NBest &nbest = nbests.Get(child.second); + return nbest; } void NBest::CreateDeviants( - const SCFG::Manager &mgr, - NBestColl &nbestColl, - Contenders &contenders) const + const SCFG::Manager &mgr, + NBestColl &nbestColl, + Contenders &contenders) const { - if (arcInd + 1 < arcList->size()) { - // to use next arclist, all children must be 1st. Not sure if this is correct - bool ok = true; - BOOST_FOREACH(const Child &child, children) { - if (child.second) { - ok = false; - break; - } - } + if (arcInd + 1 < arcList->size()) { + // to use next arclist, all children must be 1st. Not sure if this is correct + bool ok = true; + BOOST_FOREACH(const Child &child, children) { + if (child.second) { + ok = false; + break; + } + } - if (ok) { - NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl); - contenders.push(next); - } - } + if (ok) { + NBest *next = new NBest(mgr, *arcList, arcInd + 1, nbestColl); + contenders.push(next); + } + } - for (size_t childInd = 0; childInd < children.size(); ++childInd) { - const Child &child = children[childInd]; - NBests &childNBests = *child.first; - bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1); - if (extended) { - //cerr << "HH1 " << childInd << endl; - NBest *next = new NBest(mgr, *this, childInd, nbestColl); + for (size_t childInd = 0; childInd < children.size(); ++childInd) { + const Child &child = children[childInd]; + NBests &childNBests = *child.first; + bool extended = childNBests.Extend(mgr, nbestColl, child.second + 1); + if (extended) { + //cerr << "HH1 " << childInd << endl; + NBest *next = new NBest(mgr, *this, childInd, nbestColl); - //cerr << "HH2 " << childInd << endl; - contenders.push(next); - //cerr << "HH3 " << childInd << endl; - } - } + //cerr << "HH2 " << childInd << endl; + contenders.push(next); + //cerr << "HH3 " << childInd << endl; + } + } } void NBest::OutputToStream( - const SCFG::Manager &mgr, - std::stringstream &strm) const + const SCFG::Manager &mgr, + std::stringstream &strm) const { const SCFG::Hypothesis &hypo = GetHypo(); //strm << &hypo << " "; @@ -149,44 +149,43 @@ void NBest::OutputToStream( const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase(); for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { - const SCFG::Word &word = tp[targetPos]; - //cerr << "word " << pos << "=" << word << endl; - if (word.isNonTerminal) { - //cerr << "is nt" << endl; - // non-term. fill out with prev hypo - size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos]; + const SCFG::Word &word = tp[targetPos]; + //cerr << "word " << pos << "=" << word << endl; + if (word.isNonTerminal) { + //cerr << "is nt" << endl; + // non-term. fill out with prev hypo + size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[targetPos]; - UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size()); + UTIL_THROW_IF2(nonTermInd >= children.size(), "Out of bounds:" << nonTermInd << ">=" << children.size()); - const NBest &nbest = GetChild(nonTermInd); - strm << nbest.GetString(); - } - else { - //cerr << "not nt" << endl; + const NBest &nbest = GetChild(nonTermInd); + strm << nbest.GetString(); + } else { + //cerr << "not nt" << endl; word.OutputToStream(hypo.GetManager(), targetPos, hypo, strm); - strm << " "; - } + strm << " "; + } } } std::string NBest::Debug(const System &system) const { - stringstream strm; - strm << GetScores().GetTotalScore() << " " - << arcList << "(" - << arcList->size() << ")[" - << arcInd << "] "; - for (size_t i = 0; i < children.size(); ++i) { - const Child &child = children[i]; - const NBest &childNBest = child.first->Get(child.second); + stringstream strm; + strm << GetScores().GetTotalScore() << " " + << arcList << "(" + << arcList->size() << ")[" + << arcInd << "] "; + for (size_t i = 0; i < children.size(); ++i) { + const Child &child = children[i]; + const NBest &childNBest = child.first->Get(child.second); - strm << child.first << "(" - << child.first->GetSize() << ")[" - << child.second << "]"; - strm << childNBest.GetScores().GetTotalScore() << " "; - } - return strm.str(); + strm << child.first << "(" + << child.first->GetSize() << ")[" + << child.second << "]"; + strm << childNBest.GetScores().GetTotalScore() << " "; + } + return strm.str(); } } diff --git a/moses2/SCFG/nbest/NBest.h b/moses2/SCFG/nbest/NBest.h index fa21866bb..6b406fa17 100644 --- a/moses2/SCFG/nbest/NBest.h +++ b/moses2/SCFG/nbest/NBest.h @@ -34,62 +34,62 @@ typedef std::priority_queue, NBestScoreOrderer> Cont class NBest { public: - const ArcList *arcList; - size_t arcInd; + const ArcList *arcList; + size_t arcInd; - typedef std::pair Child; // key to another NBest - typedef std::vector Children; - Children children; + typedef std::pair Child; // key to another NBest + typedef std::vector Children; + Children children; - NBest(const SCFG::Manager &mgr, - const ArcList &varcList, - size_t vind, - NBestColl &nbestColl); + NBest(const SCFG::Manager &mgr, + const ArcList &varcList, + size_t vind, + NBestColl &nbestColl); - NBest(const SCFG::Manager &mgr, - const NBest &orig, - size_t childInd, - NBestColl &nbestColl); + NBest(const SCFG::Manager &mgr, + const NBest &orig, + size_t childInd, + NBestColl &nbestColl); - void CreateDeviants( - const SCFG::Manager &mgr, - NBestColl &nbestColl, - Contenders &contenders) const; + void CreateDeviants( + const SCFG::Manager &mgr, + NBestColl &nbestColl, + Contenders &contenders) const; - const Scores &GetScores() const - { return *m_scores; } + const Scores &GetScores() const { + return *m_scores; + } - const NBest &GetChild(size_t ind) const; + const NBest &GetChild(size_t ind) const; - const std::string &GetString() const - { return m_str; } + const std::string &GetString() const { + return m_str; + } - std::string GetStringExclSentenceMarkers() const - { - std::string ret = m_str.substr(4, m_str.size() - 10); - return ret; - } + std::string GetStringExclSentenceMarkers() const { + std::string ret = m_str.substr(4, m_str.size() - 10); + return ret; + } - std::string Debug(const System &system) const; + std::string Debug(const System &system) const; protected: - Scores *m_scores; - std::string m_str; + Scores *m_scores; + std::string m_str; - const SCFG::Hypothesis &GetHypo() const; + const SCFG::Hypothesis &GetHypo() const; - void OutputToStream( - const SCFG::Manager &mgr, - std::stringstream &strm) const; + void OutputToStream( + const SCFG::Manager &mgr, + std::stringstream &strm) const; }; ///////////////////////////////////////////////////////////// class NBestScoreOrderer { public: - bool operator()(const NBest* a, const NBest* b) const - { + bool operator()(const NBest* a, const NBest* b) const { return a->GetScores().GetTotalScore() < b->GetScores().GetTotalScore(); } }; diff --git a/moses2/SCFG/nbest/NBestColl.cpp b/moses2/SCFG/nbest/NBestColl.cpp index 8cd386a08..38a9ac867 100644 --- a/moses2/SCFG/nbest/NBestColl.cpp +++ b/moses2/SCFG/nbest/NBestColl.cpp @@ -21,30 +21,29 @@ namespace SCFG ///////////////////////////////////////////////////////////// NBestColl::~NBestColl() { - BOOST_FOREACH(const Coll::value_type &valPair, m_candidates) { - NBests *nbests = valPair.second; - delete nbests; - } + BOOST_FOREACH(const Coll::value_type &valPair, m_candidates) { + NBests *nbests = valPair.second; + delete nbests; + } } void NBestColl::Add(const SCFG::Manager &mgr, const ArcList &arcList) { - NBests &nbests = GetOrCreateNBests(mgr, arcList); - //cerr << "nbests for " << &nbests << ":"; + NBests &nbests = GetOrCreateNBests(mgr, arcList); + //cerr << "nbests for " << &nbests << ":"; } NBests &NBestColl::GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList) { - NBests *ret; - Coll::iterator iter = m_candidates.find(&arcList); - if(iter == m_candidates.end()) { - ret = new NBests(mgr, arcList, *this); - m_candidates[&arcList] = ret; - } - else { - ret = iter->second; - } - return *ret; + NBests *ret; + Coll::iterator iter = m_candidates.find(&arcList); + if(iter == m_candidates.end()) { + ret = new NBests(mgr, arcList, *this); + m_candidates[&arcList] = ret; + } else { + ret = iter->second; + } + return *ret; } diff --git a/moses2/SCFG/nbest/NBestColl.h b/moses2/SCFG/nbest/NBestColl.h index 1ef8a5698..01e5763e4 100644 --- a/moses2/SCFG/nbest/NBestColl.h +++ b/moses2/SCFG/nbest/NBestColl.h @@ -19,14 +19,14 @@ class Manager; class NBestColl { public: - virtual ~NBestColl(); + virtual ~NBestColl(); - void Add(const SCFG::Manager &mgr, const ArcList &arcList); - NBests &GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList); + void Add(const SCFG::Manager &mgr, const ArcList &arcList); + NBests &GetOrCreateNBests(const SCFG::Manager &mgr, const ArcList &arcList); protected: - typedef boost::unordered_map Coll; - Coll m_candidates; + typedef boost::unordered_map Coll; + Coll m_candidates; }; diff --git a/moses2/SCFG/nbest/NBests.cpp b/moses2/SCFG/nbest/NBests.cpp index ea7e835dc..27376977f 100644 --- a/moses2/SCFG/nbest/NBests.cpp +++ b/moses2/SCFG/nbest/NBests.cpp @@ -17,93 +17,91 @@ namespace Moses2 namespace SCFG { NBests::NBests(const SCFG::Manager &mgr, - const ArcList &arcList, - NBestColl &nbestColl) -:indIter(0) + const ArcList &arcList, + NBestColl &nbestColl) + :indIter(0) { - // best - NBest *contender = new NBest(mgr, arcList, 0, nbestColl); - contenders.push(contender); - bool extended = Extend(mgr, nbestColl, 0); - assert(extended); + // best + NBest *contender = new NBest(mgr, arcList, 0, nbestColl); + contenders.push(contender); + bool extended = Extend(mgr, nbestColl, 0); + assert(extended); } NBests::~NBests() { - BOOST_FOREACH(const NBest *nbest, m_coll) { - delete nbest; - } + BOOST_FOREACH(const NBest *nbest, m_coll) { + delete nbest; + } - // delete bad contenders left in queue - while (!contenders.empty()) { - NBest *contender = contenders.top(); - contenders.pop(); - delete contender; - } + // delete bad contenders left in queue + while (!contenders.empty()) { + NBest *contender = contenders.top(); + contenders.pop(); + delete contender; + } } bool NBests::Extend(const SCFG::Manager &mgr, - NBestColl &nbestColl, - size_t ind) + NBestColl &nbestColl, + size_t ind) { - if (ind < m_coll.size()) { - // asking for 1 we've dont already - return true; - } + if (ind < m_coll.size()) { + // asking for 1 we've dont already + return true; + } - assert(ind == m_coll.size()); + assert(ind == m_coll.size()); - // checks - if (ind >= mgr.system.options.nbest.nbest_size) { - return false; - } + // checks + if (ind >= mgr.system.options.nbest.nbest_size) { + return false; + } - size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor; + size_t maxIter = mgr.system.options.nbest.nbest_size * mgr.system.options.nbest.factor; - // MAIN LOOP, create 1 new deriv. - // The loop is for distinct nbest - bool ok = false; - while (!ok) { - ++indIter; - if (indIter > maxIter) { - return false; - } + // MAIN LOOP, create 1 new deriv. + // The loop is for distinct nbest + bool ok = false; + while (!ok) { + ++indIter; + if (indIter > maxIter) { + return false; + } - if (contenders.empty()) { - return false; - } + if (contenders.empty()) { + return false; + } - NBest *contender = contenders.top(); - contenders.pop(); + NBest *contender = contenders.top(); + contenders.pop(); - contender->CreateDeviants(mgr, nbestColl, contenders); + contender->CreateDeviants(mgr, nbestColl, contenders); - if (mgr.system.options.nbest.only_distinct) { - const string &tgtPhrase = contender->GetString(); - //cerr << "tgtPhrase=" << tgtPhrase << endl; - boost::hash string_hash; - size_t hash = string_hash(tgtPhrase); + if (mgr.system.options.nbest.only_distinct) { + const string &tgtPhrase = contender->GetString(); + //cerr << "tgtPhrase=" << tgtPhrase << endl; + boost::hash string_hash; + size_t hash = string_hash(tgtPhrase); - if (distinctHypos.insert(hash).second) { - ok = true; - } - } - else { - ok = true; - } + if (distinctHypos.insert(hash).second) { + ok = true; + } + } else { + ok = true; + } - if (ok) { - Add(contender); - //cerr << best->GetScores().GetTotalScore() << " "; - //cerr << best->Debug(mgr.system) << endl; - return true; - } - else { - delete contender; - } - } + if (ok) { + Add(contender); + //cerr << best->GetScores().GetTotalScore() << " "; + //cerr << best->Debug(mgr.system) << endl; + return true; + } else { + delete contender; + } + } - return false; + return false; } } diff --git a/moses2/SCFG/nbest/NBests.h b/moses2/SCFG/nbest/NBests.h index a9cb93a5d..97fe9a025 100644 --- a/moses2/SCFG/nbest/NBests.h +++ b/moses2/SCFG/nbest/NBests.h @@ -17,33 +17,34 @@ namespace SCFG class NBests { public: - Contenders contenders; - boost::unordered_set distinctHypos; + Contenders contenders; + boost::unordered_set distinctHypos; - NBests(const SCFG::Manager &mgr, - const ArcList &arcList, - NBestColl &nbestColl); + NBests(const SCFG::Manager &mgr, + const ArcList &arcList, + NBestColl &nbestColl); - virtual ~NBests(); + virtual ~NBests(); - size_t GetSize() const - { return m_coll.size(); } + size_t GetSize() const { + return m_coll.size(); + } - const NBest &Get(size_t ind) const - { return *m_coll[ind]; } + const NBest &Get(size_t ind) const { + return *m_coll[ind]; + } - bool Extend(const SCFG::Manager &mgr, - NBestColl &nbestColl, - size_t ind); + bool Extend(const SCFG::Manager &mgr, + NBestColl &nbestColl, + size_t ind); protected: - std::vector m_coll; - size_t indIter; + std::vector m_coll; + size_t indIter; - void Add(const NBest *nbest) - { - m_coll.push_back(nbest); - } + void Add(const NBest *nbest) { + m_coll.push_back(nbest); + } }; diff --git a/moses2/Scores.cpp b/moses2/Scores.cpp index b6e731807..6cf121422 100644 --- a/moses2/Scores.cpp +++ b/moses2/Scores.cpp @@ -22,26 +22,24 @@ namespace Moses2 { Scores::Scores(const System &system, MemPool &pool, size_t numScores) : - m_total(0) + m_total(0) { if (system.options.nbest.nbest_size) { m_scores = new (pool.Allocate(numScores)) SCORE[numScores]; Init(m_scores, numScores, 0); - } - else { + } else { m_scores = NULL; } } Scores::Scores(const System &system, MemPool &pool, size_t numScores, - const Scores &origScores) : - m_total(origScores.m_total) + const Scores &origScores) : + m_total(origScores.m_total) { if (system.options.nbest.nbest_size) { m_scores = new (pool.Allocate(numScores)) SCORE[numScores]; memcpy(m_scores, origScores.m_scores, sizeof(SCORE) * numScores); - } - else { + } else { m_scores = NULL; } } @@ -69,7 +67,7 @@ void Scores::Reset(const System &system) } void Scores::PlusEquals(const System &system, - const FeatureFunction &featureFunction, const SCORE &score) + const FeatureFunction &featureFunction, const SCORE &score) { assert(featureFunction.GetNumScores() == 1); @@ -84,7 +82,7 @@ void Scores::PlusEquals(const System &system, } void Scores::PlusEquals(const System &system, - const FeatureFunction &featureFunction, const SCORE &score, size_t offset) + const FeatureFunction &featureFunction, const SCORE &score, size_t offset) { assert(offset < featureFunction.GetNumScores()); @@ -99,7 +97,7 @@ void Scores::PlusEquals(const System &system, } void Scores::PlusEquals(const System &system, - const FeatureFunction &featureFunction, const std::vector &scores) + const FeatureFunction &featureFunction, const std::vector &scores) { assert(scores.size() == featureFunction.GetNumScores()); @@ -118,7 +116,7 @@ void Scores::PlusEquals(const System &system, } void Scores::PlusEquals(const System &system, - const FeatureFunction &featureFunction, SCORE scores[]) + const FeatureFunction &featureFunction, SCORE scores[]) { //assert(scores.size() == featureFunction.GetNumScores()); @@ -159,7 +157,7 @@ void Scores::MinusEquals(const System &system, const Scores &other) } void Scores::Assign(const System &system, - const FeatureFunction &featureFunction, const SCORE &score) + const FeatureFunction &featureFunction, const SCORE &score) { assert(featureFunction.GetNumScores() == 1); @@ -177,7 +175,7 @@ void Scores::Assign(const System &system, } void Scores::Assign(const System &system, - const FeatureFunction &featureFunction, const std::vector &scores) + const FeatureFunction &featureFunction, const std::vector &scores) { assert(scores.size() == featureFunction.GetNumScores()); @@ -198,13 +196,13 @@ void Scores::Assign(const System &system, } void Scores::CreateFromString(const std::string &str, - const FeatureFunction &featureFunction, const System &system, - bool transformScores) + const FeatureFunction &featureFunction, const System &system, + bool transformScores) { vector scores = Tokenize(str); if (transformScores) { std::transform(scores.begin(), scores.end(), scores.begin(), - TransformScore); + TransformScore); std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore); } @@ -223,7 +221,7 @@ std::string Scores::Debug(const System &system) const if (system.options.nbest.nbest_size) { out << ", "; - BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){ + BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()) { out << ff->GetName() << "= "; for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) { out << m_scores[i] << " "; @@ -237,7 +235,7 @@ std::string Scores::Debug(const System &system) const void Scores::OutputBreakdown(std::ostream &out, const System &system) const { if (system.options.nbest.nbest_size) { - BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()){ + BOOST_FOREACH(const FeatureFunction *ff, system.featureFunctions.GetFeatureFunctions()) { if (ff->IsTuneable()) { out << ff->GetName() << "= "; for (size_t i = ff->GetStartInd(); i < (ff->GetStartInd() + ff->GetNumScores()); ++i) { @@ -250,7 +248,7 @@ void Scores::OutputBreakdown(std::ostream &out, const System &system) const // static functions to work out estimated scores SCORE Scores::CalcWeightedScore(const System &system, - const FeatureFunction &featureFunction, SCORE scores[]) + const FeatureFunction &featureFunction, SCORE scores[]) { SCORE ret = 0; @@ -269,7 +267,7 @@ SCORE Scores::CalcWeightedScore(const System &system, } SCORE Scores::CalcWeightedScore(const System &system, - const FeatureFunction &featureFunction, SCORE score) + const FeatureFunction &featureFunction, SCORE score) { const Weights &weights = system.weights; assert(featureFunction.GetNumScores() == 1); diff --git a/moses2/Scores.h b/moses2/Scores.h index ef4896ad1..5069fda36 100644 --- a/moses2/Scores.h +++ b/moses2/Scores.h @@ -23,42 +23,43 @@ class Scores public: Scores(const System &system, MemPool &pool, size_t numScores); Scores(const System &system, MemPool &pool, size_t numScores, - const Scores &origScores); + const Scores &origScores); virtual ~Scores(); - SCORE GetTotalScore() const - { return m_total; } + SCORE GetTotalScore() const { + return m_total; + } const SCORE *GetScores(const FeatureFunction &featureFunction) const; void Reset(const System &system); void CreateFromString(const std::string &str, - const FeatureFunction &featureFunction, const System &system, - bool transformScores); + const FeatureFunction &featureFunction, const System &system, + bool transformScores); void PlusEquals(const System &system, const FeatureFunction &featureFunction, - const SCORE &score); + const SCORE &score); void PlusEquals(const System &system, const FeatureFunction &featureFunction, - const SCORE &score, size_t offset); + const SCORE &score, size_t offset); void PlusEquals(const System &system, const FeatureFunction &featureFunction, - const std::vector &scores); + const std::vector &scores); void PlusEquals(const System &system, const FeatureFunction &featureFunction, - SCORE scores[]); + SCORE scores[]); void PlusEquals(const System &system, const Scores &scores); void MinusEquals(const System &system, const Scores &scores); void Assign(const System &system, const FeatureFunction &featureFunction, - const SCORE &score); + const SCORE &score); void Assign(const System &system, const FeatureFunction &featureFunction, - const std::vector &scores); + const std::vector &scores); std::string Debug(const System &system) const; @@ -66,10 +67,10 @@ public: // static functions to work out estimated scores static SCORE CalcWeightedScore(const System &system, - const FeatureFunction &featureFunction, SCORE scores[]); + const FeatureFunction &featureFunction, SCORE scores[]); static SCORE CalcWeightedScore(const System &system, - const FeatureFunction &featureFunction, SCORE score); + const FeatureFunction &featureFunction, SCORE score); protected: SCORE *m_scores; diff --git a/moses2/SubPhrase.h b/moses2/SubPhrase.h index 893a7ba8f..21b003912 100644 --- a/moses2/SubPhrase.h +++ b/moses2/SubPhrase.h @@ -13,25 +13,25 @@ class SubPhrase: public Phrase { public: SubPhrase(const Phrase &origPhrase, size_t start, size_t size) - :m_origPhrase(&origPhrase) - ,m_start(start) - ,m_size(size) + :m_origPhrase(&origPhrase) + ,m_start(start) + ,m_size(size) {} - virtual const WORD& operator[](size_t pos) const - { return (*m_origPhrase)[pos + m_start]; } + virtual const WORD& operator[](size_t pos) const { + return (*m_origPhrase)[pos + m_start]; + } - virtual size_t GetSize() const - { return m_size; } + virtual size_t GetSize() const { + return m_size; + } - SubPhrase GetSubPhrase(size_t start, size_t size) const - { + SubPhrase GetSubPhrase(size_t start, size_t size) const { SubPhrase ret(*m_origPhrase, m_start + start, size); return ret; } - virtual std::string Debug(const System &system) const - { + virtual std::string Debug(const System &system) const { std::stringstream out; if (GetSize()) { out << (*this)[0].Debug(system); diff --git a/moses2/System.cpp b/moses2/System.cpp index c02c47a6c..63df967fe 100644 --- a/moses2/System.cpp +++ b/moses2/System.cpp @@ -21,7 +21,7 @@ namespace Moses2 { System::System(const Parameter ¶msArg) : - params(paramsArg), featureFunctions(*this) + params(paramsArg), featureFunctions(*this) { options.init(paramsArg); IsPb(); @@ -39,7 +39,7 @@ System::System(const Parameter ¶msArg) : } if (!options.output.detailed_transrep_filepath.empty()) { - detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath)); + detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath)); } featureFunctions.Create(); @@ -105,16 +105,16 @@ void System::LoadWeights() // set weight BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) { - const string &ffName = valPair.first; - const std::vector &ffWeights = valPair.second; - /* - cerr << ffName << "="; - for (size_t i = 0; i < ffWeights.size(); ++i) { - cerr << ffWeights[i] << " "; - } - cerr << endl; + const string &ffName = valPair.first; + const std::vector &ffWeights = valPair.second; + /* + cerr << ffName << "="; + for (size_t i = 0; i < ffWeights.size(); ++i) { + cerr << ffWeights[i] << " "; + } + cerr << endl; */ - weights.SetWeights(featureFunctions, ffName, ffWeights); + weights.SetWeights(featureFunctions, ffName, ffWeights); } } @@ -123,20 +123,19 @@ void System::LoadMappings() const PARAM_VEC *vec = params.GetParam("mapping"); UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section"); - BOOST_FOREACH(const std::string &line, *vec){ - vector toks = Tokenize(line); - assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") ); + BOOST_FOREACH(const std::string &line, *vec) { + vector toks = Tokenize(line); + assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") ); - size_t ptInd; - if (toks.size() == 2) { - ptInd = Scan(toks[1]); + size_t ptInd; + if (toks.size() == 2) { + ptInd = Scan(toks[1]); + } else { + ptInd = Scan(toks[2]); + } + const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd); + mappings.push_back(pt); } - else { - ptInd = Scan(toks[2]); - } - const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd); - mappings.push_back(pt); -} // unk pt const UnknownWordPenalty *unkWP = featureFunctions.GetUnknownWordPenalty(); @@ -150,17 +149,15 @@ void System::LoadDecodeGraphBackoff() const PARAM_VEC *vec = params.GetParam("decoding-graph-backoff"); for (size_t i = 0; i < mappings.size(); ++i) { - PhraseTable *pt = const_cast(mappings[i]); + PhraseTable *pt = const_cast(mappings[i]); - if (vec && vec->size() < i) { - pt->decodeGraphBackoff = Scan((*vec)[i]); - } - else if (pt == featureFunctions.GetUnknownWordPenalty()) { - pt->decodeGraphBackoff = 1; - } - else { - pt->decodeGraphBackoff = 0; - } + if (vec && vec->size() < i) { + pt->decodeGraphBackoff = Scan((*vec)[i]); + } else if (pt == featureFunctions.GetUnknownWordPenalty()) { + pt->decodeGraphBackoff = 1; + } else { + pt->decodeGraphBackoff = 0; + } } } diff --git a/moses2/TargetPhrase.h b/moses2/TargetPhrase.h index 50f66326a..2522f85df 100644 --- a/moses2/TargetPhrase.h +++ b/moses2/TargetPhrase.h @@ -27,25 +27,27 @@ public: SCORE *scoreProperties; TargetPhrase(MemPool &pool, const PhraseTable &pt, const System &system, size_t size) - : PhraseImplTemplate(pool, size) - , pt(pt) - , scoreProperties(NULL) - , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) - { + : PhraseImplTemplate(pool, size) + , pt(pt) + , scoreProperties(NULL) + , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) { m_scores = new (pool.Allocate()) Scores(system, pool, - system.featureFunctions.GetNumScores()); + system.featureFunctions.GetNumScores()); } - Scores &GetScores() - { return *m_scores; } + Scores &GetScores() { + return *m_scores; + } - const Scores &GetScores() const - { return *m_scores; } + const Scores &GetScores() const { + return *m_scores; + } virtual SCORE GetScoreForPruning() const = 0; - SCORE *GetScoresProperty(int propertyInd) const - { return scoreProperties ? scoreProperties + propertyInd : NULL; } + SCORE *GetScoresProperty(int propertyInd) const { + return scoreProperties ? scoreProperties + propertyInd : NULL; + } const AlignmentInfo &GetAlignTerm() const { return *m_alignTerm; @@ -63,8 +65,7 @@ public: m_alignTerm = AlignmentInfoCollection::Instance().Add(coll); } - virtual void SetAlignmentInfo(const std::string &alignString) - { + virtual void SetAlignmentInfo(const std::string &alignString) { AlignmentInfo::CollType alignTerm; std::vector toks = Tokenize(alignString); @@ -86,35 +87,32 @@ public: } - void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const - { - // get placeholders - FactorType placeholderFactor = system.options.input.placeholder_factor; - std::map placeholders; - if (placeholderFactor != NOT_FOUND) { - // creates map of target position -> factor for placeholders - placeholders = GetPlaceholders(system, inputPhrase); - } + void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const { + // get placeholders + FactorType placeholderFactor = system.options.input.placeholder_factor; + std::map placeholders; + if (placeholderFactor != NOT_FOUND) { + // creates map of target position -> factor for placeholders + placeholders = GetPlaceholders(system, inputPhrase); + } - size_t size = PhraseImplTemplate::GetSize(); - for (size_t i = 0; i < size; ++i) { - // output placeholder, if any - std::map::const_iterator iter = placeholders.find(i); - if (iter == placeholders.end()) { - const WORD &word = (*this)[i]; - word.OutputToStream(system, out); - } - else { - const Factor *factor = iter->second; - out << *factor; - } + size_t size = PhraseImplTemplate::GetSize(); + for (size_t i = 0; i < size; ++i) { + // output placeholder, if any + std::map::const_iterator iter = placeholders.find(i); + if (iter == placeholders.end()) { + const WORD &word = (*this)[i]; + word.OutputToStream(system, out); + } else { + const Factor *factor = iter->second; + out << *factor; + } - out << " "; - } + out << " "; + } } - std::map GetPlaceholders(const System &system, const Phrase &inputPhrase) const - { + std::map GetPlaceholders(const System &system, const Phrase &inputPhrase) const { FactorType placeholderFactor = system.options.input.placeholder_factor; std::map ret; //std::cerr << "inputPhrase=" << inputPhrase.Debug(system) << std::endl; @@ -122,8 +120,8 @@ public: for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { const Factor *factor = inputPhrase[sourcePos][placeholderFactor]; if (factor) { - //std::cerr << "factor=" << *factor << std::endl; - //std::cerr << "tp=" << Debug(system) << std::endl; + //std::cerr << "factor=" << *factor << std::endl; + //std::cerr << "tp=" << Debug(system) << std::endl; std::set targetPos = GetAlignTerm().GetAlignmentsForSource(sourcePos); UTIL_THROW_IF2(targetPos.size() != 1, "Placeholder should be aligned to 1, and only 1, word:" << targetPos.size() << "!=1"); @@ -134,8 +132,7 @@ public: return ret; } - virtual std::string Debug(const System &system) const - { + virtual std::string Debug(const System &system) const { std::stringstream out; out << Phrase::Debug(system); out << " pt=" << pt.GetName() << " "; @@ -153,15 +150,12 @@ protected: /////////////////////////////////////////////////////////////////////// template -struct CompareScoreForPruning -{ - bool operator()(const TP *a, const TP *b) const - { +struct CompareScoreForPruning { + bool operator()(const TP *a, const TP *b) const { return a->GetScoreForPruning() > b->GetScoreForPruning(); } - bool operator()(const TP &a, const TP &b) const - { + bool operator()(const TP &a, const TP &b) const { return a.GetScoreForPruning() > b.GetScoreForPruning(); } }; diff --git a/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp index 338a8e221..47f03626a 100644 --- a/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp +++ b/moses2/TranslationModel/CompactPT/BlockHashIndex.cpp @@ -33,10 +33,10 @@ namespace Moses2 { #ifdef WITH_THREADS BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits, - size_t threadsNum) : - m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), m_fileHandle(0), m_fileHandleStart( - 0), m_landmarks(true), m_size(0), m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges( - 0), m_threadPool(threadsNum) + size_t threadsNum) : + m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), m_fileHandle(0), m_fileHandleStart( + 0), m_landmarks(true), m_size(0), m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges( + 0), m_threadPool(threadsNum) { #ifndef HAVE_CMPH std::cerr << "minphr: CMPH support not compiled in." << std::endl; @@ -45,9 +45,9 @@ BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits, } #else BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits) -: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), -m_fileHandle(0), m_fileHandleStart(0), m_size(0), -m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) + : m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits), + m_fileHandle(0), m_fileHandleStart(0), m_size(0), + m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) { #ifndef HAVE_CMPH std::cerr << "minphr: CMPH support not compiled in." << std::endl; @@ -60,11 +60,11 @@ BlockHashIndex::~BlockHashIndex() { #ifdef HAVE_CMPH for (std::vector::iterator it = m_hashes.begin(); it != m_hashes.end(); - it++) + it++) if (*it != 0) cmph_destroy((cmph_t*) *it); for (std::vector*>::iterator it = m_arrays.begin(); - it != m_arrays.end(); it++) + it != m_arrays.end(); it++) if (*it != 0) delete *it; #endif } @@ -73,7 +73,7 @@ size_t BlockHashIndex::GetHash(const char* key) { std::string keyStr(key); size_t i = std::distance(m_landmarks.begin(), - std::upper_bound(m_landmarks.begin(), m_landmarks.end(), keyStr)) - 1; + std::upper_bound(m_landmarks.begin(), m_landmarks.end(), keyStr)) - 1; if (i == 0ul - 1) return GetSize(); @@ -99,14 +99,14 @@ size_t BlockHashIndex::GetHash(size_t i, const char* key) //LoadRange(i); #ifdef HAVE_CMPH size_t idx = cmph_search((cmph_t*) m_hashes[i], key, - (cmph_uint32) strlen(key)); + (cmph_uint32) strlen(key)); #else assert(0); size_t idx = 0; #endif std::pair orderPrint = m_arrays[i]->Get(idx, m_orderBits, - m_fingerPrintBits); + m_fingerPrintBits); m_clocks[i] = clock(); if (GetFprint(key) == orderPrint.second) return orderPrint.first; @@ -229,7 +229,7 @@ size_t BlockHashIndex::FinalizeSave() size_t fileHandleStop = std::ftell(m_fileHandle); return fileHandleStop - m_fileHandleStart + sizeof(m_orderBits) - + sizeof(m_fingerPrintBits); + + sizeof(m_fingerPrintBits); } size_t BlockHashIndex::Save(std::FILE * mphf) @@ -262,7 +262,7 @@ size_t BlockHashIndex::LoadIndex(std::FILE* mphf) read += std::fread(&seekIndexSize, sizeof(size_t), 1, m_fileHandle); m_seekIndex.resize(seekIndexSize); read += std::fread(&m_seekIndex[0], sizeof(size_t), seekIndexSize, - m_fileHandle); + m_fileHandle); m_hashes.resize(seekIndexSize, 0); m_clocks.resize(seekIndexSize, 0); m_arrays.resize(seekIndexSize, 0); @@ -403,13 +403,13 @@ void* BlockHashIndex::vectorAdapter(std::vector& v) } void* BlockHashIndex::vectorAdapter( - StringVector& sv) + StringVector& sv) { return (void*) CmphStringVectorAdapter(sv); } void* BlockHashIndex::vectorAdapter( - StringVector& sv) + StringVector& sv) { return (void*) CmphStringVectorAdapter(sv); } diff --git a/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/moses2/TranslationModel/CompactPT/BlockHashIndex.h index b91ef8f6c..10c55601e 100644 --- a/moses2/TranslationModel/CompactPT/BlockHashIndex.h +++ b/moses2/TranslationModel/CompactPT/BlockHashIndex.h @@ -81,17 +81,14 @@ private: { public: HashTask(int id, BlockHashIndex& hash, Keys& keys) : - m_id(id), m_hash(hash), m_keys(new Keys(keys)) - { + m_id(id), m_hash(hash), m_keys(new Keys(keys)) { } - virtual void Run() - { + virtual void Run() { m_hash.CalcHash(m_id, *m_keys); } - virtual ~HashTask() - { + virtual ~HashTask() { delete m_keys; } @@ -108,7 +105,7 @@ private: public: #ifdef WITH_THREADS BlockHashIndex(size_t orderBits, size_t fingerPrintBits, - size_t threadsNum = 2); + size_t threadsNum = 2); #else BlockHashIndex(size_t orderBits, size_t fingerPrintBits); #endif @@ -147,8 +144,7 @@ public: void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1); template - void AddRange(Keys &keys) - { + void AddRange(Keys &keys) { size_t current = m_landmarks.size(); if (m_landmarks.size() && m_landmarks.back().str() >= keys[0]) { @@ -171,7 +167,7 @@ public: #ifdef WITH_THREADS boost::shared_ptr > ht( - new HashTask(current, *this, keys)); + new HashTask(current, *this, keys)); m_threadPool.Submit(ht); #else CalcHash(current, keys); @@ -179,8 +175,7 @@ public: } template - void CalcHash(size_t current, Keys &keys) - { + void CalcHash(size_t current, Keys &keys) { #ifdef HAVE_CMPH void* source = vectorAdapter(keys); CalcHash(current, source); diff --git a/moses2/TranslationModel/CompactPT/CanonicalHuffman.h b/moses2/TranslationModel/CompactPT/CanonicalHuffman.h index ffb6488c0..eb11c730a 100644 --- a/moses2/TranslationModel/CompactPT/CanonicalHuffman.h +++ b/moses2/TranslationModel/CompactPT/CanonicalHuffman.h @@ -43,24 +43,20 @@ private: typedef boost::unordered_map > EncodeMap; EncodeMap m_encodeMap; - struct MinHeapSorter - { + struct MinHeapSorter { std::vector& m_vec; MinHeapSorter(std::vector& vec) : - m_vec(vec) - { + m_vec(vec) { } - bool operator()(size_t a, size_t b) - { + bool operator()(size_t a, size_t b) { return m_vec[a] > m_vec[b]; } }; template - void CalcLengths(Iterator begin, Iterator end, std::vector& lengths) - { + void CalcLengths(Iterator begin, Iterator end, std::vector& lengths) { size_t n = std::distance(begin, end); std::vector A(2 * n, 0); @@ -109,11 +105,10 @@ private: lengths[i] = A[i + n]; } - void CalcCodes(std::vector& lengths) - { + void CalcCodes(std::vector& lengths) { std::vector numLength; for (std::vector::iterator it = lengths.begin(); - it != lengths.end(); it++) { + it != lengths.end(); it++) { size_t length = *it; if (numLength.size() <= length) numLength.resize(length + 1, 0); numLength[length]++; @@ -139,7 +134,7 @@ private: size_t length = lengths[i]; size_t pos = m_lengthIndex[length] - + (nextCode[length] - m_firstCodes[length]); + + (nextCode[length] - m_firstCodes[length]); t_symbols[pos] = data; nextCode[length] = nextCode[length] + 1; @@ -148,13 +143,12 @@ private: m_symbols.swap(t_symbols); } - void CreateCodeMap() - { + void CreateCodeMap() { for (size_t l = 1; l < m_lengthIndex.size(); l++) { size_t intCode = m_firstCodes[l]; size_t num = ( - (l + 1 < m_lengthIndex.size()) ? - m_lengthIndex[l + 1] : m_symbols.size()) - m_lengthIndex[l]; + (l + 1 < m_lengthIndex.size()) ? + m_lengthIndex[l + 1] : m_symbols.size()) - m_lengthIndex[l]; for (size_t i = 0; i < num; i++) { Data data = m_symbols[m_lengthIndex[l] + i]; @@ -165,17 +159,15 @@ private: } } - const boost::dynamic_bitset<>& Encode(Data data) const - { + const boost::dynamic_bitset<>& Encode(Data data) const { typename EncodeMap::const_iterator it = m_encodeMap.find(data); UTIL_THROW_IF2(it == m_encodeMap.end(), - "Cannot find symbol in encoding map"); + "Cannot find symbol in encoding map"); return it->second; } template - void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code) - { + void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code) { for (int j = code.size() - 1; j >= 0; j--) bitWrapper.Put(code[j]); } @@ -183,8 +175,7 @@ private: public: template - CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true) - { + CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true) { std::vector lengths; CalcLengths(begin, end, lengths); CalcCodes(lengths); @@ -192,22 +183,19 @@ public: if (forEncoding) CreateCodeMap(); } - CanonicalHuffman(std::FILE* pFile, bool forEncoding = false) - { + CanonicalHuffman(std::FILE* pFile, bool forEncoding = false) { Load(pFile); if (forEncoding) CreateCodeMap(); } template - void Put(BitWrapper& bitWrapper, Data data) - { + void Put(BitWrapper& bitWrapper, Data data) { PutCode(bitWrapper, Encode(data)); } template - Data Read(BitWrapper& bitWrapper) - { + Data Read(BitWrapper& bitWrapper) { if (bitWrapper.TellFromEnd()) { size_t intCode = bitWrapper.Read(); size_t len = 1; @@ -220,8 +208,7 @@ public: return Data(); } - size_t Load(std::FILE* pFile) - { + size_t Load(std::FILE* pFile) { size_t start = std::ftell(pFile); size_t read = 0; @@ -241,8 +228,7 @@ public: return std::ftell(pFile) - start; } - size_t Save(std::FILE* pFile) - { + size_t Save(std::FILE* pFile) { size_t start = std::ftell(pFile); size_t size = m_symbols.size(); @@ -277,24 +263,20 @@ private: public: BitWrapper(Container &data) : - m_data(data), m_iterator(m_data.begin()), m_currentValue(0), m_valueBits( - sizeof(typename Container::value_type) * 8), m_mask(1), m_bitPos(0) - { + m_data(data), m_iterator(m_data.begin()), m_currentValue(0), m_valueBits( + sizeof(typename Container::value_type) * 8), m_mask(1), m_bitPos(0) { } - bool Read() - { + bool Read() { if (m_bitPos % m_valueBits == 0) { if (m_iterator != m_data.end()) m_currentValue = *m_iterator++; - } - else m_currentValue = m_currentValue >> 1; + } else m_currentValue = m_currentValue >> 1; m_bitPos++; return (m_currentValue & m_mask); } - void Put(bool bit) - { + void Put(bool bit) { if (m_bitPos % m_valueBits == 0) m_data.push_back(0); if (bit) m_data[m_data.size() - 1] |= m_mask << (m_bitPos % m_valueBits); @@ -302,40 +284,34 @@ public: m_bitPos++; } - size_t Tell() - { + size_t Tell() { return m_bitPos; } - size_t TellFromEnd() - { + size_t TellFromEnd() { if (m_data.size() * m_valueBits < m_bitPos) return 0; return m_data.size() * m_valueBits - m_bitPos; } - void Seek(size_t bitPos) - { + void Seek(size_t bitPos) { m_bitPos = bitPos; m_iterator = m_data.begin() + int((m_bitPos - 1) / m_valueBits); m_currentValue = (*m_iterator) >> ((m_bitPos - 1) % m_valueBits); m_iterator++; } - void SeekFromEnd(size_t bitPosFromEnd) - { + void SeekFromEnd(size_t bitPosFromEnd) { size_t bitPos = m_data.size() * m_valueBits - bitPosFromEnd; Seek(bitPos); } - void Reset() - { + void Reset() { m_iterator = m_data.begin(); m_currentValue = 0; m_bitPos = 0; } - Container& GetContainer() - { + Container& GetContainer() { return m_data; } }; diff --git a/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp index a51dc5a45..8dc3ebde6 100644 --- a/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp +++ b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp @@ -42,7 +42,7 @@ void CmphStringVectorAdapterRewind(void *data) cmph_io_adapter_t *CmphVectorAdapterNew(std::vector& v) { cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc( - sizeof(cmph_io_adapter_t)); + sizeof(cmph_io_adapter_t)); cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t)); assert(key_source); assert(cmph_vector); diff --git a/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h index 20d43a80c..8d23b4f41 100644 --- a/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h +++ b/moses2/TranslationModel/CompactPT/CmphStringVectorAdapter.h @@ -33,18 +33,17 @@ namespace Moses2 { -typedef struct -{ +typedef struct { void *vector; cmph_uint32 position; } cmph_vector_t; template class Allocator> cmph_io_adapter_t *CmphStringVectorAdapterNew( - StringVector& sv) + StringVector& sv) { cmph_io_adapter_t * key_source = (cmph_io_adapter_t *) malloc( - sizeof(cmph_io_adapter_t)); + sizeof(cmph_io_adapter_t)); cmph_vector_t * cmph_vector = (cmph_vector_t *) malloc(sizeof(cmph_vector_t)); assert(key_source); assert(cmph_vector); @@ -79,7 +78,7 @@ void CmphStringVectorAdapterRewind(void *data); template class Allocator> cmph_io_adapter_t* CmphStringVectorAdapter( - StringVector& sv) + StringVector& sv) { cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv); diff --git a/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp index 1d32b9a6f..051116dec 100644 --- a/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp +++ b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp @@ -32,23 +32,23 @@ namespace Moses2 bool LexicalReorderingTableCompact::s_inMemoryByDefault = false; LexicalReorderingTableCompact::LexicalReorderingTableCompact( - const std::string& filePath, const std::vector& f_factors, - const std::vector& e_factors, - const std::vector& c_factors) : - LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( - s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( - true), m_hash(10, 16), m_scoreTrees(1) + const std::string& filePath, const std::vector& f_factors, + const std::vector& e_factors, + const std::vector& c_factors) : + LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( + s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( + true), m_hash(10, 16), m_scoreTrees(1) { Load(filePath); } LexicalReorderingTableCompact::LexicalReorderingTableCompact( - const std::vector& f_factors, - const std::vector& e_factors, - const std::vector& c_factors) : - LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( - s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( - true), m_hash(10, 16), m_scoreTrees(1) + const std::vector& f_factors, + const std::vector& e_factors, + const std::vector& c_factors) : + LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( + s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( + true), m_hash(10, 16), m_scoreTrees(1) { } @@ -81,7 +81,7 @@ std::vector LexicalReorderingTableCompact::GetScore(const Phrase bitStream(scoresString); for (size_t i = 0; i < m_numScoreComponent; i++) scores.push_back( - m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream)); + m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream)); return scores; } @@ -93,7 +93,7 @@ std::string LexicalReorderingTableCompact::MakeKey(const Phrase& f const Phrase& e, const Phrase& c) const { return MakeKey(Trim(f.GetString(m_FactorsF)), Trim(e.GetString(m_FactorsE)), - Trim(c.GetString(m_FactorsC))); + Trim(c.GetString(m_FactorsC))); } std::string LexicalReorderingTableCompact::MakeKey(const std::string& f, @@ -126,7 +126,7 @@ LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath, //there exists a compact binary version use that std::cerr << "Using compact lexical reordering table" << std::endl; return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, - e_factors, c_factors); + e_factors, c_factors); } // file name is specified with suffix if (filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) @@ -134,7 +134,7 @@ LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath, //there exists a compact binary version use that std::cerr << "Using compact lexical reordering table" << std::endl; return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, - c_factors); + c_factors); } #endif return 0; @@ -152,16 +152,15 @@ void LexicalReorderingTableCompact::Load(std::string filePath) size_t read = 0; read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, - pFile); + pFile); read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, - pFile); + pFile); if (m_multipleScoreTrees) { m_scoreTrees.resize(m_numScoreComponent); for (size_t i = 0; i < m_numScoreComponent; i++) m_scoreTrees[i] = new CanonicalHuffman(pFile); - } - else { + } else { m_scoreTrees.resize(1); m_scoreTrees[0] = new CanonicalHuffman(pFile); } diff --git a/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h index 90abf4197..cef6ae108 100644 --- a/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h +++ b/moses2/TranslationModel/CompactPT/LexicalReorderingTableCompact.h @@ -36,13 +36,11 @@ class LexicalReorderingTable { public: LexicalReorderingTable(const FactorList& f_factors, - const FactorList& e_factors, const FactorList& c_factors) : - m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors) - { + const FactorList& e_factors, const FactorList& c_factors) : + m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors) { } - virtual ~LexicalReorderingTable() - { + virtual ~LexicalReorderingTable() { } public: @@ -51,33 +49,27 @@ public: GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0; virtual - void InitializeForInput() - { + void InitializeForInput() { /* override for on-demand loading */ } ; virtual - void InitializeForInputPhrase(const Phrase&) - { + void InitializeForInputPhrase(const Phrase&) { } - const FactorList& GetFFactorMask() const - { + const FactorList& GetFFactorMask() const { return m_FactorsF; } - const FactorList& GetEFactorMask() const - { + const FactorList& GetEFactorMask() const { return m_FactorsE; } - const FactorList& GetCFactorMask() const - { + const FactorList& GetCFactorMask() const { return m_FactorsC; } virtual - void DbgDump(std::ostream* out) const - { + void DbgDump(std::ostream* out) const { *out << "Overwrite in subclass...\n"; } ; @@ -109,17 +101,17 @@ private: std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const; std::string MakeKey(const std::string& f, const std::string& e, - const std::string& c) const; + const std::string& c) const; public: LexicalReorderingTableCompact(const std::string& filePath, - const std::vector& f_factors, - const std::vector& e_factors, - const std::vector& c_factors); + const std::vector& f_factors, + const std::vector& e_factors, + const std::vector& c_factors); LexicalReorderingTableCompact(const std::vector& f_factors, - const std::vector& e_factors, - const std::vector& c_factors); + const std::vector& e_factors, + const std::vector& c_factors); virtual ~LexicalReorderingTableCompact(); @@ -129,9 +121,9 @@ public: static LexicalReorderingTable* CheckAndLoad(const std::string& filePath, - const std::vector& f_factors, - const std::vector& e_factors, - const std::vector& c_factors); + const std::vector& f_factors, + const std::vector& e_factors, + const std::vector& c_factors); void Load(std::string filePath); diff --git a/moses2/TranslationModel/CompactPT/ListCoders.h b/moses2/TranslationModel/CompactPT/ListCoders.h index 5a01274d9..540f50a59 100644 --- a/moses2/TranslationModel/CompactPT/ListCoders.h +++ b/moses2/TranslationModel/CompactPT/ListCoders.h @@ -33,8 +33,7 @@ class VarIntType { private: template - static void EncodeSymbol(IntType input, OutIt output) - { + static void EncodeSymbol(IntType input, OutIt output) { if (input == 0) { *output = 0; output++; @@ -56,8 +55,7 @@ private: ; template - static void DecodeSymbol(InIt &it, InIt end, IntType &output) - { + static void DecodeSymbol(InIt &it, InIt end, IntType &output) { T msb = 1 << (sizeof(T) * 8 - 1); IntType shift = (sizeof(T) * 8 - 1); @@ -81,8 +79,7 @@ private: public: template - static void Encode(InIt it, InIt end, OutIt outIt) - { + static void Encode(InIt it, InIt end, OutIt outIt) { while (it != end) { EncodeSymbol(*it, outIt); it++; @@ -90,8 +87,7 @@ public: } template - static void Decode(InIt &it, InIt end, OutIt outIt) - { + static void Decode(InIt &it, InIt end, OutIt outIt) { while (it != end) { size_t output; DecodeSymbol(it, end, output); @@ -101,8 +97,7 @@ public: } template - static size_t DecodeAndSum(InIt &it, InIt end, size_t num) - { + static size_t DecodeAndSum(InIt &it, InIt end, size_t num) { size_t sum = 0; size_t curr = 0; @@ -130,8 +125,7 @@ private: typedef unsigned int uint; template - inline static void EncodeSymbol(uint &output, InIt it, InIt end) - { + inline static void EncodeSymbol(uint &output, InIt it, InIt end) { uint length = end - it; uint type = 0; @@ -182,8 +176,8 @@ private: uint i = 0; while (it != end) { UTIL_THROW_IF2(*it > 268435455, - "You are trying to encode " << *it - << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)"); + "You are trying to encode " << *it + << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)"); uint l = bitlength * (length - i - 1); output |= *it << l; @@ -193,8 +187,7 @@ private: } template - static inline void DecodeSymbol(uint input, OutIt outIt) - { + static inline void DecodeSymbol(uint input, OutIt outIt) { uint type = (input >> 28); uint bitlen = 0; @@ -258,8 +251,7 @@ private: outIt++; } - static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) - { + static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) { uint type = (input >> 28); uint bitlen = 0; @@ -327,8 +319,7 @@ private: public: template - static void Encode(InIt it, InIt end, OutIt outIt) - { + static void Encode(InIt it, InIt end, OutIt outIt) { uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; uint buffer[28]; @@ -367,8 +358,7 @@ public: } template - static void Decode(InIt &it, InIt end, OutIt outIt) - { + static void Decode(InIt &it, InIt end, OutIt outIt) { while (it != end) { DecodeSymbol(*it, outIt); it++; @@ -376,8 +366,7 @@ public: } template - static size_t DecodeAndSum(InIt &it, InIt end, size_t num) - { + static size_t DecodeAndSum(InIt &it, InIt end, size_t num) { size_t sum = 0; size_t curr = 0; while (it != end && curr < num) { diff --git a/moses2/TranslationModel/CompactPT/MmapAllocator.h b/moses2/TranslationModel/CompactPT/MmapAllocator.h index 1e40d8d41..c92166da6 100644 --- a/moses2/TranslationModel/CompactPT/MmapAllocator.h +++ b/moses2/TranslationModel/CompactPT/MmapAllocator.h @@ -63,43 +63,37 @@ public: typedef std::ptrdiff_t difference_type; MmapAllocator() throw () : - m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)), m_page_size( - util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed( - false), m_count(new size_t(0)) - { + m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)), m_page_size( + util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed( + false), m_count(new size_t(0)) { } MmapAllocator(std::FILE* f_ptr) throw () : - m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size( - util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed( - false), m_count(new size_t(0)) - { + m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size( + util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset(0), m_fixed( + false), m_count(new size_t(0)) { } MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw () : - m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size( - util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset( - data_offset), m_fixed(true), m_count(new size_t(0)) - { + m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)), m_page_size( + util::SizePage()), m_map_size(0), m_data_ptr(0), m_data_offset( + data_offset), m_fixed(true), m_count(new size_t(0)) { } MmapAllocator(std::string fileName) throw () : - m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc( - fileno(m_file_ptr)), m_page_size(util::SizePage()), m_map_size(0), m_data_ptr( - 0), m_data_offset(0), m_fixed(false), m_count(new size_t(0)) - { + m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc( + fileno(m_file_ptr)), m_page_size(util::SizePage()), m_map_size(0), m_data_ptr( + 0), m_data_offset(0), m_fixed(false), m_count(new size_t(0)) { } MmapAllocator(const MmapAllocator& c) throw () : - m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc), m_page_size( - c.m_page_size), m_map_size(c.m_map_size), m_data_ptr(c.m_data_ptr), m_data_offset( - c.m_data_offset), m_fixed(c.m_fixed), m_count(c.m_count) - { + m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc), m_page_size( + c.m_page_size), m_map_size(c.m_map_size), m_data_ptr(c.m_data_ptr), m_data_offset( + c.m_data_offset), m_fixed(c.m_fixed), m_count(c.m_count) { (*m_count)++; } - ~MmapAllocator() throw () - { + ~MmapAllocator() throw () { if (m_data_ptr && *m_count == 0) { util::UnmapOrThrow(m_data_ptr, m_map_size); if (!m_fixed && std::ftell(m_file_ptr) != -1) std::fclose(m_file_ptr); @@ -108,28 +102,23 @@ public: } template - struct rebind - { + struct rebind { typedef MmapAllocator other; }; - pointer address(reference value) const - { + pointer address(reference value) const { return &value; } - const_pointer address(const_reference value) const - { + const_pointer address(const_reference value) const { return &value; } - size_type max_size() const throw () - { + size_type max_size() const throw () { return std::numeric_limits::max() / sizeof(value_type); } - pointer allocate(size_type num, const void* = 0) - { + pointer allocate(size_type num, const void* = 0) { m_map_size = num * sizeof(T); #if defined(_WIN32) || defined(_WIN64) @@ -142,57 +131,52 @@ public: size_t read = 0; read += ftruncate(m_file_desc, m_map_size); m_data_ptr = (char *) util::MapOrThrow(m_map_size, true, map_shared, - false, m_file_desc, 0); + false, m_file_desc, 0); return (pointer) m_data_ptr; - } - else { + } else { const size_t map_offset = (m_data_offset / m_page_size) * m_page_size; const size_t relative_offset = m_data_offset - map_offset; const size_t adjusted_map_size = m_map_size + relative_offset; m_data_ptr = (char *) util::MapOrThrow(adjusted_map_size, false, - map_shared, false, m_file_desc, map_offset); + map_shared, false, m_file_desc, map_offset); return (pointer) (m_data_ptr + relative_offset); } } - void deallocate(pointer p, size_type num) - { + void deallocate(pointer p, size_type num) { if (!m_fixed) { util::UnmapOrThrow(p, num * sizeof(T)); - } - else { + } else { const size_t map_offset = (m_data_offset / m_page_size) * m_page_size; const size_t relative_offset = m_data_offset - map_offset; const size_t adjusted_map_size = m_map_size + relative_offset; util::UnmapOrThrow((pointer) ((char*) p - relative_offset), - adjusted_map_size); + adjusted_map_size); } } - void construct(pointer p, const T& value) - { + void construct(pointer p, const T& value) { if (!m_fixed) new (p) value_type(value); } - void destroy(pointer p) - { + void destroy(pointer p) { if (!m_fixed) p->~T(); } template friend bool operator==(const MmapAllocator&, - const MmapAllocator&) throw (); + const MmapAllocator&) throw (); template friend bool operator!=(const MmapAllocator&, - const MmapAllocator&) throw (); + const MmapAllocator&) throw (); }; template bool operator==(const MmapAllocator& a1, - const MmapAllocator& a2) throw () + const MmapAllocator& a2) throw () { bool equal = true; equal &= a1.m_file_ptr == a2.m_file_ptr; @@ -207,7 +191,7 @@ bool operator==(const MmapAllocator& a1, template bool operator!=(const MmapAllocator& a1, - const MmapAllocator& a2) throw () + const MmapAllocator& a2) throw () { return !(a1 == a2); } diff --git a/moses2/TranslationModel/CompactPT/MonotonicVector.h b/moses2/TranslationModel/CompactPT/MonotonicVector.h index 586397db8..179354657 100644 --- a/moses2/TranslationModel/CompactPT/MonotonicVector.h +++ b/moses2/TranslationModel/CompactPT/MonotonicVector.h @@ -43,7 +43,7 @@ namespace Moses2 { template class Allocator = std::allocator> + template class Allocator = std::allocator> class MonotonicVector { private: @@ -62,17 +62,14 @@ public: typedef PosT value_type; MonotonicVector() : - m_size(0), m_last(0), m_final(false) - { + m_size(0), m_last(0), m_final(false) { } - size_t size() const - { + size_t size() const { return m_size + m_tempDiffs.size(); } - PosT at(size_t i) const - { + PosT at(size_t i) const { PosT s = stepSize; PosT j = m_anchors[i / s]; PosT r = i % s; @@ -83,23 +80,20 @@ public: k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1); if (i < m_size) k += Simple9::DecodeAndSum(it, m_diffs.end(), r); else if (i < m_size + m_tempDiffs.size()) for (size_t l = 0; l < r; l++) - k += m_tempDiffs[l]; + k += m_tempDiffs[l]; return k; } - PosT operator[](PosT i) const - { + PosT operator[](PosT i) const { return at(i); } - PosT back() const - { + PosT back() const { return at(size() - 1); } - void push_back(PosT i) - { + void push_back(PosT i) { assert(m_final != true); if (m_anchors.size() == 0 && m_tempDiffs.size() == 0) { @@ -113,14 +107,13 @@ public: if (m_tempDiffs.size() == stepSize - 1) { Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), - std::back_inserter(m_diffs)); + std::back_inserter(m_diffs)); m_anchors.push_back(m_diffs.size()); VarInt32::Encode(&i, &i + 1, std::back_inserter(m_diffs)); m_size += m_tempDiffs.size() + 1; m_tempDiffs.clear(); - } - else { + } else { PosT last = m_last; PosT diff = i - last; m_tempDiffs.push_back(diff); @@ -128,24 +121,21 @@ public: m_last = i; } - void commit() - { + void commit() { assert(m_final != true); Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), - std::back_inserter(m_diffs)); + std::back_inserter(m_diffs)); m_size += m_tempDiffs.size(); m_tempDiffs.clear(); m_final = true; } - size_t usage() - { + size_t usage() { return m_diffs.size() * sizeof(unsigned int) - + m_anchors.size() * sizeof(NumT); + + m_anchors.size() * sizeof(NumT); } - size_t load(std::FILE* in, bool map = false) - { + size_t load(std::FILE* in, bool map = false) { size_t byteSize = 0; byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool); @@ -160,8 +150,7 @@ public: template size_t loadVector(std::vector >& v, - std::FILE* in, bool map = false) - { + std::FILE* in, bool map = false) { // Can only be read into memory. Mapping not possible with std:allocator. assert(map == false); @@ -178,8 +167,7 @@ public: template size_t loadVector(std::vector >& v, - std::FILE* in, bool map = false) - { + std::FILE* in, bool map = false) { size_t byteSize = 0; size_t valSize; @@ -191,9 +179,8 @@ public: v.resize(valSize, 0); byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) - * sizeof(ValueT); - } - else { + * sizeof(ValueT); + } else { // Map it directly on specified region of file "in" starting at valPos // with length valSize * sizeof(ValueT). Mapped region cannot be resized. @@ -211,31 +198,29 @@ public: return byteSize; } - size_t save(std::FILE* out) - { + size_t save(std::FILE* out) { if (!m_final) commit(); bool byteSize = 0; byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool); byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) - * sizeof(size_t); + * sizeof(size_t); byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT); size_t size = m_diffs.size(); byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) - * sizeof(unsigned int); + * sizeof(unsigned int); size = m_anchors.size(); byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) - * sizeof(NumT); + * sizeof(NumT); return byteSize; } - void swap(MonotonicVector &mv) - { + void swap(MonotonicVector &mv) { if (!m_final) commit(); m_diffs.swap(mv.m_diffs); diff --git a/moses2/TranslationModel/CompactPT/MurmurHash3.cpp b/moses2/TranslationModel/CompactPT/MurmurHash3.cpp index c3e567af6..988c1627f 100644 --- a/moses2/TranslationModel/CompactPT/MurmurHash3.cpp +++ b/moses2/TranslationModel/CompactPT/MurmurHash3.cpp @@ -151,7 +151,7 @@ void MurmurHash3_x86_32(const void * key, int len, uint32_t seed, void * out) //----------------------------------------------------------------------------- void MurmurHash3_x86_128(const void * key, const int len, uint32_t seed, - void * out) + void * out) { const uint8_t * data = (const uint8_t*) key; const int nblocks = len / 16; @@ -312,7 +312,7 @@ void MurmurHash3_x86_128(const void * key, const int len, uint32_t seed, //----------------------------------------------------------------------------- void MurmurHash3_x64_128(const void * key, const int len, const uint32_t seed, - void * out) + void * out) { const uint8_t * data = (const uint8_t*) key; const int nblocks = len / 16; diff --git a/moses2/TranslationModel/CompactPT/PackedArray.h b/moses2/TranslationModel/CompactPT/PackedArray.h index 409c3cca8..2da59a9f2 100644 --- a/moses2/TranslationModel/CompactPT/PackedArray.h +++ b/moses2/TranslationModel/CompactPT/PackedArray.h @@ -43,22 +43,19 @@ protected: D* m_storage; public: - PackedArray() - { + PackedArray() { m_size = 0; m_storageSize = 0; m_storage = new D[0]; } PackedArray(size_t size, size_t bits) : - m_size(size) - { + m_size(size) { m_storageSize = ceil(float(bits * size) / float(m_dataBits)); m_storage = new D[m_storageSize]; } - PackedArray(const PackedArray &c) - { + PackedArray(const PackedArray &c) { m_size = c.m_size; m_storageSize = c.m_storageSize; @@ -67,16 +64,14 @@ public: std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D)); } - virtual ~PackedArray() - { + virtual ~PackedArray() { delete[] m_storage; m_size = 0; m_storageSize = 0; m_storage = 0; } - T Get(size_t i, size_t bits) const - { + T Get(size_t i, size_t bits) const { T out = 0; size_t bitstart = (i * bits); @@ -97,8 +92,7 @@ public: return out; } - void Set(size_t i, T v, size_t bits) - { + void Set(size_t i, T v, size_t bits) { size_t bitstart = (i * bits); size_t bitpos = bitstart; @@ -116,23 +110,19 @@ public: } } - virtual D*& GetStorage() - { + virtual D*& GetStorage() { return m_storage; } - virtual size_t GetStorageSize() const - { + virtual size_t GetStorageSize() const { return m_storageSize; } - virtual size_t Size() const - { + virtual size_t Size() const { return m_size; } - virtual size_t Load(std::FILE* in) - { + virtual size_t Load(std::FILE* in) { size_t a1 = std::ftell(in); size_t read = 0; @@ -146,8 +136,7 @@ public: return a2 - a1; } - virtual size_t Save(std::FILE* out) - { + virtual size_t Save(std::FILE* out) { size_t a1 = std::ftell(out); ThrowingFwrite(&m_size, sizeof(m_size), 1, out); @@ -170,31 +159,26 @@ class PairedPackedArray: public PackedArray { public: PairedPackedArray() : - PackedArray() - { + PackedArray() { } PairedPackedArray(size_t size, size_t bits1, size_t bits2) : - PackedArray(size, bits1 + bits2) - { + PackedArray(size, bits1 + bits2) { } - void Set(size_t i, T a, T b, size_t bits1, size_t bits2) - { + void Set(size_t i, T a, T b, size_t bits1, size_t bits2) { T c = 0; c = a | (b << bits1); PackedArray::Set(i, c, bits1 + bits2); } - void Set(size_t i, std::pair p, size_t bits1, size_t bits2) - { + void Set(size_t i, std::pair p, size_t bits1, size_t bits2) { T c = 0; c = p.second | (p.first << bits1); PackedArray::Set(i, c); } - std::pair Get(size_t i, size_t bits1, size_t bits2) - { + std::pair Get(size_t i, size_t bits1, size_t bits2) { T v = PackedArray::Get(i, bits1 + bits2); T a = v & ((1 << bits1) - 1); T b = v >> bits1; diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp index 7860fed94..dc4369cfe 100644 --- a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp +++ b/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp @@ -192,10 +192,10 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source) } TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection( - const ManagerBase &mgr, - const Phrase &sourcePhrase, - bool topLevel, - bool eval) + const ManagerBase &mgr, + const Phrase &sourcePhrase, + bool topLevel, + bool eval) { // Not using TargetPhraseCollection avoiding "new" operator @@ -226,9 +226,9 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection( size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)]; /* cerr << "sourcePhraseString=" << sourcePhraseString << " " - << sourcePhraseId - << endl; - */ + << sourcePhraseId + << endl; + */ if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) { // Retrieve compressed and encoded target phrase collection std::string encodedPhraseCollection; @@ -251,12 +251,12 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection( } TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( - const ManagerBase &mgr, - TargetPhraseVectorPtr tpv, - BitWrapper<> &encodedBitStream, - const Phrase &sourcePhrase, - bool topLevel, - bool eval) + const ManagerBase &mgr, + TargetPhraseVectorPtr tpv, + BitWrapper<> &encodedBitStream, + const Phrase &sourcePhrase, + bool topLevel, + bool eval) { const System &system = mgr.system; FactorCollection &vocab = system.GetVocab(); diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/moses2/TranslationModel/CompactPT/PhraseDecoder.h index 79faa38a6..3113f4a0d 100644 --- a/moses2/TranslationModel/CompactPT/PhraseDecoder.h +++ b/moses2/TranslationModel/CompactPT/PhraseDecoder.h @@ -122,18 +122,18 @@ public: size_t Load(std::FILE* in); TargetPhraseVectorPtr CreateTargetPhraseCollection( - const ManagerBase &mgr, - const Phrase &sourcePhrase, - bool topLevel = false, - bool eval = true); + const ManagerBase &mgr, + const Phrase &sourcePhrase, + bool topLevel = false, + bool eval = true); TargetPhraseVectorPtr DecodeCollection( - const ManagerBase &mgr, - TargetPhraseVectorPtr tpv, - BitWrapper<> &encodedBitStream, - const Phrase &sourcePhrase, - bool topLevel, - bool eval); + const ManagerBase &mgr, + TargetPhraseVectorPtr tpv, + BitWrapper<> &encodedBitStream, + const Phrase &sourcePhrase, + bool topLevel, + bool eval); void PruneCache(); }; diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp index 49244df1b..707a52f58 100644 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp +++ b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp @@ -16,11 +16,11 @@ namespace Moses2 bool PhraseTableCompact::s_inMemoryByDefault = false; PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line) -:PhraseTable(startInd, line) -,m_inMemory(s_inMemoryByDefault) -,m_useAlignmentInfo(true) -,m_hash(10, 16) -,m_phraseDecoder(0) + :PhraseTable(startInd, line) + ,m_inMemory(s_inMemoryByDefault) + ,m_useAlignmentInfo(true) + ,m_hash(10, 16) + ,m_phraseDecoder(0) { ReadParameters(); } @@ -58,8 +58,7 @@ void PhraseTableCompact::Load(System &system) if(m_inMemory) { // Load target phrase collections into memory phraseSize = m_targetPhrasesMemory.load(pFile, false); - } - else { + } else { // Keep target phrase collections on disk phraseSize = m_targetPhrasesMapped.load(pFile, true); } @@ -72,8 +71,7 @@ void PhraseTableCompact::SetParameter(const std::string& key, const std::string& { if (key == "blah") { - } - else { + } else { PhraseTable::SetParameter(key, value); } } @@ -95,16 +93,16 @@ void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) InputPaths &inputPathsCast = static_cast(inputPaths); for (size_t i = 0; i < inputSize; ++i) { - for (size_t startPos = 0; startPos < inputSize; ++startPos) { - size_t endPos = startPos + i; - if (endPos >= inputSize) { - break; - } - InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i); - //cerr << "path=" << path->Debug(mgr.system) << endl; - TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tps); - } + for (size_t startPos = 0; startPos < inputSize; ++startPos) { + size_t endPos = startPos + i; + if (endPos >= inputSize) { + break; + } + InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i); + //cerr << "path=" << path->Debug(mgr.system) << endl; + TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); + path->AddTargetPhrases(*this, tps); + } } } @@ -154,67 +152,67 @@ TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool, } const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const + const Manager &mgr, + const TPCompact &tpCompact, + const Phrase &sourcePhrase) const { - MemPool &pool = mgr.GetPool(); + MemPool &pool = mgr.GetPool(); - size_t size = tpCompact.words.size(); - TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size); + size_t size = tpCompact.words.size(); + TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size); - // words - for (size_t i = 0; i < size; ++i) { - const Word &compactWord = tpCompact.words[i]; - Word &tpWord = (*ret)[i]; - tpWord = compactWord; - } + // words + for (size_t i = 0; i < size; ++i) { + const Word &compactWord = tpCompact.words[i]; + Word &tpWord = (*ret)[i]; + tpWord = compactWord; + } - // scores - Scores &scores = ret->GetScores(); - scores.Assign(mgr.system, *this, tpCompact.scores); + // scores + Scores &scores = ret->GetScores(); + scores.Assign(mgr.system, *this, tpCompact.scores); - // align - ret->SetAlignTerm(tpCompact.alignment); + // align + ret->SetAlignTerm(tpCompact.alignment); - // score - mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret); + // score + mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret); - // Cache phrase pair for clean-up or retrieval with PREnc - //const_cast(this)->CacheForCleanup(phraseColl); + // Cache phrase pair for clean-up or retrieval with PREnc + //const_cast(this)->CacheForCleanup(phraseColl); - //cerr << "ret=" << ret->Debug(mgr.system) << endl; - return ret; + //cerr << "ret=" << ret->Debug(mgr.system) << endl; + return ret; } // scfg void PhraseTableCompact::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const { UTIL_THROW2("Not implemented"); } void PhraseTableCompact::Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { UTIL_THROW2("Not implemented"); } void PhraseTableCompact::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h index 84ea7e4b2..dced14c0c 100644 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h +++ b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h @@ -18,22 +18,22 @@ public: virtual void CleanUpAfterSentenceProcessing() const; virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; // scfg virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; virtual void Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: static bool s_inMemoryByDefault; @@ -49,19 +49,19 @@ protected: PhraseDecoder* m_phraseDecoder; const TargetPhraseImpl *CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const; + const Manager &mgr, + const TPCompact &tpCompact, + const Phrase &sourcePhrase) const; // SCFG virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; }; diff --git a/moses2/TranslationModel/CompactPT/StringVector.h b/moses2/TranslationModel/CompactPT/StringVector.h index 87d6388bf..0b2aa176f 100644 --- a/moses2/TranslationModel/CompactPT/StringVector.h +++ b/moses2/TranslationModel/CompactPT/StringVector.h @@ -53,13 +53,11 @@ public: const ValueIteratorT& begin() const; const ValueIteratorT& end() const; const std::string str() const; - operator const std::string() - { + operator const std::string() { return str(); } - size_t size() - { + size_t size() { return std::distance(m_begin, m_end); } @@ -75,7 +73,7 @@ public: // ********** StringVector ********** template class Allocator = std::allocator> + template class Allocator = std::allocator> class StringVector { protected: @@ -94,7 +92,7 @@ public: // ********** RangeIterator ********** class RangeIterator: public boost::iterator_facade + std::random_access_iterator_tag, range, PosT> { private: @@ -122,7 +120,7 @@ public: // ********** StringIterator ********** class StringIterator: public boost::iterator_facade + std::string, std::random_access_iterator_tag, const std::string, PosT> { private: @@ -152,13 +150,11 @@ public: StringVector(bool allocate = false); StringVector(Allocator& alloc); - virtual ~StringVector() - { + virtual ~StringVector() { delete m_charArray; } - void swap(StringVector &c) - { + void swap(StringVector &c) { m_positions.commit(); m_positions.swap(c.m_positions); m_charArray->swap(*c.m_charArray); @@ -184,8 +180,7 @@ public: const ValueT* begin(PosT i) const; const ValueT* end(PosT i) const; - void clear() - { + void clear() { m_charArray->clear(); m_sorted = true; m_positions = MonotonicVector(); @@ -203,8 +198,7 @@ public: PosT find(StringT &s) const; PosT find(const char* c) const; - virtual size_t load(std::FILE* in, bool memoryMapped = false) - { + virtual size_t load(std::FILE* in, bool memoryMapped = false) { size_t size = 0; m_memoryMapped = memoryMapped; @@ -216,8 +210,7 @@ public: } size_t loadCharArray(std::vector >*& c, - std::FILE* in, bool map = false) - { + std::FILE* in, bool map = false) { // Can only be read into memory. Mapping not possible with std:allocator. assert(map == false); @@ -228,14 +221,13 @@ public: c = new std::vector >(valSize, 0); byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) - * sizeof(ValueT); + * sizeof(ValueT); return byteSize; } size_t loadCharArray(std::vector >*& c, - std::FILE* in, bool map = false) - { + std::FILE* in, bool map = false) { size_t byteSize = 0; size_t valSize; @@ -246,9 +238,8 @@ public: // and map memory onto temporary file. Can be resized. c = new std::vector >(valSize, 0); byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) - * sizeof(ValueT); - } - else { + * sizeof(ValueT); + } else { // Map it directly on specified region of file "in" starting at valPos // with length valSize * sizeof(ValueT). Mapped region cannot be resized. @@ -263,16 +254,14 @@ public: return byteSize; } - size_t load(std::string filename, bool memoryMapped = false) - { + size_t load(std::string filename, bool memoryMapped = false) { std::FILE* pFile = fopen(filename.c_str(), "r"); size_t byteSize = load(pFile, memoryMapped); fclose(pFile); return byteSize; } - size_t save(std::FILE* out) - { + size_t save(std::FILE* out) { size_t byteSize = 0; byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool); @@ -280,15 +269,14 @@ public: size_t valSize = size2(); byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out) - * sizeof(size_t); + * sizeof(size_t); byteSize += ThrowingFwrite(&(*m_charArray)[0], sizeof(ValueT), valSize, out) - * sizeof(ValueT); + * sizeof(ValueT); return byteSize; } - size_t save(std::string filename) - { + size_t save(std::string filename) { std::FILE* pFile = fopen(filename.c_str(), "w"); size_t byteSize = save(pFile); fclose(pFile); @@ -304,7 +292,7 @@ public: template ValueIteratorRange::ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end) : - m_begin(begin), m_end(end) + m_begin(begin), m_end(end) { } @@ -334,7 +322,7 @@ template bool ValueIteratorRange::operator==(const StringT& o) const { if (std::distance(m_begin, m_end) == std::distance(o.begin(), o.end())) return std::equal( - m_begin, m_end, o.begin()); + m_begin, m_end, o.begin()); else return false; } @@ -349,7 +337,7 @@ template bool ValueIteratorRange::operator<(const StringT &s2) const { return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(), - std::less::value_type>()); + std::less::value_type>()); } template @@ -362,8 +350,8 @@ template bool operator<(const StringT &s1, const ValueIteratorRange &s2) { return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), - s2.end(), - std::less::value_type>()); + s2.end(), + std::less::value_type>()); } template @@ -371,7 +359,7 @@ bool operator<(const char* c, const ValueIteratorRange &s2) { size_t len = std::char_traits::length(c); return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(), - std::less::value_type>()); + std::less::value_type>()); } template @@ -387,15 +375,15 @@ OStream& operator<<(OStream &os, ValueIteratorRange cr) template class Allocator> StringVector::StringVector(bool allocate) : - m_sorted(true), m_memoryMapped(false), m_charArray( - allocate ? new std::vector >() : 0) + m_sorted(true), m_memoryMapped(false), m_charArray( + allocate ? new std::vector >() : 0) { } template class Allocator> StringVector::StringVector(Allocator &alloc) : - m_sorted(true), m_memoryMapped(false), m_charArray( - new std::vector >(alloc)) + m_sorted(true), m_memoryMapped(false), m_charArray( + new std::vector >(alloc)) { } @@ -428,12 +416,12 @@ template Iterator StringVector::end() const { return Iterator(const_cast&>(*this), - size()); + size()); } template class Allocator> typename StringVector::iterator StringVector::begin() const + PosT, Allocator>::begin() const { return begin(); } @@ -441,7 +429,7 @@ typename StringVector::iterator StringVector class Allocator> typename StringVector::iterator StringVector::end() const + PosT, Allocator>::end() const { return end(); } @@ -467,21 +455,21 @@ PosT StringVector::size2() const template class Allocator> typename StringVector::range StringVector::at(PosT i) const + Allocator>::at(PosT i) const { return range(begin(i), end(i)); } template class Allocator> typename StringVector::range StringVector::operator[](PosT i) const + Allocator>::operator[](PosT i) const { return at(i); } template class Allocator> typename StringVector::range StringVector::back() const + Allocator>::back() const { return at(size() - 1); } @@ -520,7 +508,7 @@ template PosT StringVector::find(StringT &s) const { if (m_sorted) return std::distance(begin(), - std::lower_bound(begin(), end(), s)); + std::lower_bound(begin(), end(), s)); return std::distance(begin(), std::find(begin(), end(), s)); } @@ -535,14 +523,14 @@ PosT StringVector::find(const char* c) const template class Allocator> StringVector::RangeIterator::RangeIterator() : - m_index(0), m_container(0) + m_index(0), m_container(0) { } template class Allocator> StringVector::RangeIterator::RangeIterator( - StringVector &sv, PosT index) : - m_index(index), m_container(&sv) + StringVector &sv, PosT index) : + m_index(index), m_container(&sv) { } @@ -554,15 +542,15 @@ PosT StringVector::RangeIterator::get_index() template class Allocator> typename StringVector::range StringVector::RangeIterator::dereference() const + Allocator>::RangeIterator::dereference() const { return typename StringVector::range( - m_container->begin(m_index), m_container->end(m_index)); + m_container->begin(m_index), m_container->end(m_index)); } template class Allocator> bool StringVector::RangeIterator::equal( - StringVector::RangeIterator const& other) const + StringVector::RangeIterator const& other) const { return m_index == other.m_index && m_container == other.m_container; } @@ -587,7 +575,7 @@ void StringVector::RangeIterator::advance(PosT n) template class Allocator> PosT StringVector::RangeIterator::distance_to( - StringVector::RangeIterator const& other) const + StringVector::RangeIterator const& other) const { return other.m_index - m_index; } @@ -596,14 +584,14 @@ PosT StringVector::RangeIterator::distance_to( template class Allocator> StringVector::StringIterator::StringIterator() : - m_index(0), m_container(0) + m_index(0), m_container(0) { } template class Allocator> StringVector::StringIterator::StringIterator( - StringVector &sv, PosT index) : - m_index(index), m_container(&sv) + StringVector &sv, PosT index) : + m_index(index), m_container(&sv) { } @@ -617,12 +605,12 @@ template class Allocator> const std::string StringVector::StringIterator::dereference() const { return StringVector::range( - m_container->begin(m_index), m_container->end(m_index)).str(); + m_container->begin(m_index), m_container->end(m_index)).str(); } template class Allocator> bool StringVector::StringIterator::equal( - StringVector::StringIterator const& other) const + StringVector::StringIterator const& other) const { return m_index == other.m_index && m_container == other.m_container; } @@ -647,7 +635,7 @@ void StringVector::StringIterator::advance(PosT n) template class Allocator> PosT StringVector::StringIterator::distance_to( - StringVector::StringIterator const& other) const + StringVector::StringIterator const& other) const { return other.m_index - m_index; } diff --git a/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h index 3a9e6f170..75ab40c93 100644 --- a/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h +++ b/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h @@ -35,14 +35,12 @@ namespace Moses2 { typedef std::pair AlignPointSizeT; -struct PhraseCompact : public std::vector -{ +struct PhraseCompact : public std::vector { public: PhraseCompact(const Phrase ©); }; -struct TPCompact -{ +struct TPCompact { std::vector words; std::set alignment; std::vector scores; diff --git a/moses2/TranslationModel/Memory/Node.h b/moses2/TranslationModel/Memory/Node.h index 97fa9618e..d5a6b8795 100644 --- a/moses2/TranslationModel/Memory/Node.h +++ b/moses2/TranslationModel/Memory/Node.h @@ -25,56 +25,50 @@ public: typedef boost::unordered_map Children; Node() - :m_targetPhrases(NULL) - ,m_unsortedTPS(NULL) + :m_targetPhrases(NULL) + ,m_unsortedTPS(NULL) {} ~Node() {} - void AddRule(const std::vector &factors, SP &source, TP *target) - { + void AddRule(const std::vector &factors, SP &source, TP *target) { AddRule(factors, source, target, 0); } - TPS *Find(const std::vector &factors, const SP &source, size_t pos = 0) const - { + TPS *Find(const std::vector &factors, const SP &source, size_t pos = 0) const { assert(source.GetSize()); if (pos == source.GetSize()) { return m_targetPhrases; - } - else { + } else { const WORD &word = source[pos]; //cerr << "word=" << word << endl; typename Children::const_iterator iter = m_children.find(word.hash(factors)); if (iter == m_children.end()) { return NULL; - } - else { + } else { const Node &child = iter->second; return child.Find(factors, source, pos + 1); } } } - const Node *Find(const std::vector &factors, const WORD &word) const - { + const Node *Find(const std::vector &factors, const WORD &word) const { typename Children::const_iterator iter = m_children.find(word.hash(factors)); if (iter == m_children.end()) { return NULL; - } - else { + } else { const Node &child = iter->second; return &child; } } - const TPS *GetTargetPhrases() const - { return m_targetPhrases; } + const TPS *GetTargetPhrases() const { + return m_targetPhrases; + } - void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) - { - BOOST_FOREACH(typename Children::value_type &val, m_children){ + void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) { + BOOST_FOREACH(typename Children::value_type &val, m_children) { Node &child = val.second; child.SortAndPrune(tableLimit, pool, system); } @@ -95,8 +89,9 @@ public: } } - const Children &GetChildren() const - { return m_children; } + const Children &GetChildren() const { + return m_children; + } void Debug(std::ostream &out, const System &system) const { BOOST_FOREACH(const typename Children::value_type &valPair, m_children) { @@ -110,8 +105,7 @@ protected: Phrase *m_source; std::vector *m_unsortedTPS; - Node &AddRule(const std::vector &factors, SP &source, TP *target, size_t pos) - { + Node &AddRule(const std::vector &factors, SP &source, TP *target, size_t pos) { if (pos == source.GetSize()) { if (m_unsortedTPS == NULL) { m_unsortedTPS = new std::vector(); @@ -120,8 +114,7 @@ protected: m_unsortedTPS->push_back(target); return *this; - } - else { + } else { const WORD &word = source[pos]; Node &child = m_children[word.hash(factors)]; //std::cerr << "added " << word << " " << &child << " from " << this << std::endl; diff --git a/moses2/TranslationModel/Memory/PhraseTableMemory.cpp b/moses2/TranslationModel/Memory/PhraseTableMemory.cpp index 09eead137..9b231ebd6 100644 --- a/moses2/TranslationModel/Memory/PhraseTableMemory.cpp +++ b/moses2/TranslationModel/Memory/PhraseTableMemory.cpp @@ -37,9 +37,9 @@ namespace Moses2 //////////////////////////////////////////////////////////////////////// PhraseTableMemory::PhraseTableMemory(size_t startInd, const std::string &line) -:PhraseTable(startInd, line) -,m_rootPb(NULL) -,m_rootSCFG(NULL) + :PhraseTable(startInd, line) + ,m_rootPb(NULL) + ,m_rootSCFG(NULL) { ReadParameters(); } @@ -58,8 +58,7 @@ void PhraseTableMemory::Load(System &system) if (system.isPb) { m_rootPb = new PBNODE(); - } - else { + } else { m_rootSCFG = new SCFGNODE(); //cerr << "m_rootSCFG=" << m_rootSCFG << endl; } @@ -80,17 +79,17 @@ void PhraseTableMemory::Load(System &system) if (system.isPb) { PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, - toks[0]); + toks[0]); //cerr << "created soure" << endl; TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, - toks[1]); + toks[1]); //cerr << "created target" << endl; target->GetScores().CreateFromString(toks[2], *this, system, true); //cerr << "created scores:" << *target << endl; if (toks.size() >= 4) { - //cerr << "alignstr=" << toks[3] << endl; - target->SetAlignmentInfo(toks[3]); + //cerr << "alignstr=" << toks[3] << endl; + target->SetAlignmentInfo(toks[3]); } // properties @@ -105,13 +104,12 @@ void PhraseTableMemory::Load(System &system) m_rootPb->AddRule(m_input, *source, target); //cerr << "target=" << target->Debug(system) << endl; - } - else { + } else { SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, - toks[0]); + toks[0]); //cerr << "created source:" << *source << endl; SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, - system, toks[1]); + system, toks[1]); //cerr << "created target " << *target << " source=" << *source << endl; @@ -139,8 +137,7 @@ void PhraseTableMemory::Load(System &system) if (system.isPb) { m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); //cerr << "root=" << &m_rootPb << endl; - } - else { + } else { m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); //cerr << "root=" << &m_rootPb << endl; } @@ -162,9 +159,9 @@ TargetPhrases* PhraseTableMemory::Lookup(const Manager &mgr, MemPool &pool, } void PhraseTableMemory::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const { size_t ptInd = GetPtInd(); ActiveChartEntryMem *chartEntry = new (pool.Allocate()) ActiveChartEntryMem(pool, *m_rootSCFG); @@ -173,10 +170,10 @@ void PhraseTableMemory::InitActiveChart( } void PhraseTableMemory::Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { if (path.range.GetNumWordsCovered() > maxChartSpan) { return; @@ -213,13 +210,13 @@ void PhraseTableMemory::Lookup(MemPool &pool, } void PhraseTableMemory::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { const ActiveChartEntryMem &prevEntryCast = static_cast(prevEntry); @@ -252,10 +249,10 @@ void PhraseTableMemory::LookupGivenNode( // there are some rules /* cerr << "outPath=" << outPath.range - << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system) - << " pt=" << GetPtInd() - << " tps=" << tps->Debug(mgr.system) << endl; - */ + << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system) + << " pt=" << GetPtInd() + << " tps=" << tps->Debug(mgr.system) << endl; + */ outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind()); } diff --git a/moses2/TranslationModel/Memory/PhraseTableMemory.h b/moses2/TranslationModel/Memory/PhraseTableMemory.h index 035c7c9c5..07a47c7ff 100644 --- a/moses2/TranslationModel/Memory/PhraseTableMemory.h +++ b/moses2/TranslationModel/Memory/PhraseTableMemory.h @@ -33,16 +33,16 @@ class PhraseTableMemory: public PhraseTable const PhraseTableMemory::SCFGNODE &node; ActiveChartEntryMem(MemPool &pool, const PhraseTableMemory::SCFGNODE &vnode) - :Parent(pool) - ,node(vnode) + :Parent(pool) + ,node(vnode) {} ActiveChartEntryMem( - MemPool &pool, - const PhraseTableMemory::SCFGNODE &vnode, - const ActiveChartEntry &prevEntry) - :Parent(prevEntry) - ,node(vnode) + MemPool &pool, + const PhraseTableMemory::SCFGNODE &vnode, + const ActiveChartEntry &prevEntry) + :Parent(prevEntry) + ,node(vnode) {} }; @@ -53,31 +53,31 @@ public: virtual void Load(System &system); virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; void Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: PBNODE *m_rootPb; SCFGNODE *m_rootSCFG; void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; }; diff --git a/moses2/TranslationModel/PhraseTable.cpp b/moses2/TranslationModel/PhraseTable.cpp index c790147bb..fef6771d6 100644 --- a/moses2/TranslationModel/PhraseTable.cpp +++ b/moses2/TranslationModel/PhraseTable.cpp @@ -22,8 +22,8 @@ namespace Moses2 //////////////////////////////////////////////////////////////////////////// PhraseTable::PhraseTable(size_t startInd, const std::string &line) : - StatelessFeatureFunction(startInd, line), m_tableLimit(20) // default - , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE) + StatelessFeatureFunction(startInd, line), m_tableLimit(20) // default + , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE) { m_input.push_back(0); } @@ -37,20 +37,15 @@ void PhraseTable::SetParameter(const std::string& key, const std::string& value) { if (key == "cache-size") { m_maxCacheSize = Scan(value); - } - else if (key == "path") { + } else if (key == "path") { m_path = value; - } - else if (key == "input-factor") { - m_input = Tokenize(value, ","); - } - else if (key == "output-factor") { - m_output = Tokenize(value, ","); - } - else if (key == "table-limit") { + } else if (key == "input-factor") { + m_input = Tokenize(value, ","); + } else if (key == "output-factor") { + m_output = Tokenize(value, ","); + } else if (key == "table-limit") { m_tableLimit = Scan(value); - } - else { + } else { StatelessFeatureFunction::SetParameter(key, value); } } @@ -59,29 +54,27 @@ bool PhraseTable::SatisfyBackoff(const Manager &mgr, const InputPath &path) cons { const InputType &input = mgr.GetInput(); if ((mgr.system.options.input.xml_policy == XmlExclusive) - && input.XmlOverlap(path.range.GetStartPos(), path.range.GetEndPos())) { - return false; + && input.XmlOverlap(path.range.GetStartPos(), path.range.GetEndPos())) { + return false; } - //cerr << GetName() << "=" << GetPtInd() << "=" << decodeGraphBackoff << endl; - if (decodeGraphBackoff == 0) { - // always lookup - return true; - } - else if (decodeGraphBackoff == -1) { - // lookup only if there's no existing rules - return path.GetNumRules() ? false : true; - } - else if (path.range.GetNumWordsCovered() <= decodeGraphBackoff) { - return path.GetNumRules() ? false : true; - } + //cerr << GetName() << "=" << GetPtInd() << "=" << decodeGraphBackoff << endl; + if (decodeGraphBackoff == 0) { + // always lookup + return true; + } else if (decodeGraphBackoff == -1) { + // lookup only if there's no existing rules + return path.GetNumRules() ? false : true; + } else if (path.range.GetNumWordsCovered() <= decodeGraphBackoff) { + return path.GetNumRules() ? false : true; + } - return false; + return false; } void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const { - BOOST_FOREACH(InputPathBase *pathBase, inputPaths){ + BOOST_FOREACH(InputPathBase *pathBase, inputPaths) { InputPath *path = static_cast(pathBase); //cerr << "path=" << path->range << " "; @@ -102,29 +95,29 @@ void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const } TargetPhrases *PhraseTable::Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const + InputPath &inputPath) const { UTIL_THROW2("Not implemented"); } void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system, - const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } void PhraseTable::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const { } // scfg void PhraseTable::LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { //cerr << "BEFORE LookupUnary" << path.Debug(mgr.system) << endl; size_t startPos = path.range.GetStartPos(); @@ -134,12 +127,12 @@ void PhraseTable::LookupUnary(MemPool &pool, } void PhraseTable::LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const { size_t endPos = outPath.range.GetEndPos(); @@ -161,13 +154,13 @@ void PhraseTable::LookupNT( } void PhraseTable::LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { size_t ptInd = GetPtInd(); diff --git a/moses2/TranslationModel/PhraseTable.h b/moses2/TranslationModel/PhraseTable.h index 9237f5ba6..ef40c06a4 100644 --- a/moses2/TranslationModel/PhraseTable.h +++ b/moses2/TranslationModel/PhraseTable.h @@ -42,43 +42,45 @@ public: virtual void SetParameter(const std::string& key, const std::string& value); virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; - void SetPtInd(size_t ind) - { m_ptInd = ind; } + void SetPtInd(size_t ind) { + m_ptInd = ind; + } - size_t GetPtInd() const - { return m_ptInd; } + size_t GetPtInd() const { + return m_ptInd; + } bool SatisfyBackoff(const Manager &mgr, const InputPath &path) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhrase &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhrase &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; // scfg virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const = 0; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const = 0; virtual void Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const = 0; + MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const = 0; virtual void LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: std::string m_path; @@ -89,38 +91,37 @@ protected: // cache size_t m_maxCacheSize; // 0 = no caching - struct CacheCollEntry2 - { + struct CacheCollEntry2 { TargetPhrases *tpsPtr; clock_t clock; }; // scfg virtual void LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const; virtual void LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const = 0; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const = 0; }; diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 1f22f45be..d405bf026 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -30,17 +30,17 @@ using namespace std; namespace Moses2 { ProbingPT::ActiveChartEntryProbing::ActiveChartEntryProbing( - MemPool &pool, - const ActiveChartEntryProbing &prevEntry) -:Parent(prevEntry) -,m_key(prevEntry.m_key) + MemPool &pool, + const ActiveChartEntryProbing &prevEntry) + :Parent(prevEntry) + ,m_key(prevEntry.m_key) {} void ProbingPT::ActiveChartEntryProbing::AddSymbolBindElement( - const Range &range, - const SCFG::Word &word, - const Moses2::Hypotheses *hypos, - const Moses2::PhraseTable &pt) + const Range &range, + const SCFG::Word &word, + const Moses2::Hypotheses *hypos, + const Moses2::PhraseTable &pt) { const ProbingPT &probingPt = static_cast(pt); std::pair key = GetKey(word, probingPt); @@ -68,8 +68,8 @@ std::pair ProbingPT::ActiveChartEntryProbing::GetKey(const SCFG: //////////////////////////////////////////////////////////////////////////// ProbingPT::ProbingPT(size_t startInd, const std::string &line) -:PhraseTable(startInd, line) -,load_method(util::POPULATE_OR_READ) + :PhraseTable(startInd, line) + ,load_method(util::POPULATE_OR_READ) { ReadParameters(); } @@ -89,10 +89,10 @@ void ProbingPT::Load(System &system) // source vocab const std::map &sourceVocab = - m_engine->getSourceVocab(); + m_engine->getSourceVocab(); std::map::const_iterator iterSource; for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); - ++iterSource) { + ++iterSource) { string wordStr = iterSource->second; bool isNT; //cerr << "wordStr=" << wordStr << endl; @@ -142,29 +142,23 @@ void ProbingPT::Load(System &system) void ProbingPT::SetParameter(const std::string& key, const std::string& value) { - if (key == "load") { + if (key == "load") { if (value == "lazy") { load_method = util::LAZY; - } - else if (value == "populate_or_lazy") { + } else if (value == "populate_or_lazy") { load_method = util::POPULATE_OR_LAZY; - } - else if (value == "populate_or_read" || value == "populate") { + } else if (value == "populate_or_read" || value == "populate") { load_method = util::POPULATE_OR_READ; - } - else if (value == "read") { + } else if (value == "read") { load_method = util::READ; - } - else if (value == "parallel_read") { + } else if (value == "parallel_read") { load_method = util::PARALLEL_READ; + } else { + UTIL_THROW2("load method not supported" << value); } - else { - UTIL_THROW2("load method not supported" << value); - } - } - else { - PhraseTable::SetParameter(key, value); - } + } else { + PhraseTable::SetParameter(key, value); + } } void ProbingPT::CreateAlignmentMap(System &system, const std::string path) @@ -191,19 +185,19 @@ void ProbingPT::CreateAlignmentMap(System &system, const std::string path) void ProbingPT::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const { - BOOST_FOREACH(InputPathBase *pathBase, inputPaths){ - InputPath *path = static_cast(pathBase); + BOOST_FOREACH(InputPathBase *pathBase, inputPaths) { + InputPath *path = static_cast(pathBase); - if (SatisfyBackoff(mgr, *path)) { - TargetPhrases *tpsPtr; - tpsPtr = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tpsPtr); - } + if (SatisfyBackoff(mgr, *path)) { + TargetPhrases *tpsPtr; + tpsPtr = Lookup(mgr, mgr.GetPool(), *path); + path->AddTargetPhrases(*this, tpsPtr); + } } } TargetPhrases* ProbingPT::Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const + InputPath &inputPath) const { /* if (inputPath.prefixPath && inputPath.prefixPath->GetTargetPhrases(*this) == NULL) { @@ -234,7 +228,7 @@ TargetPhrases* ProbingPT::Lookup(const Manager &mgr, MemPool &pool, // query pt TargetPhrases *tps = CreateTargetPhrases(pool, mgr.system, sourcePhrase, - keyStruct.second); + keyStruct.second); return tps; } @@ -251,8 +245,7 @@ std::pair ProbingPT::GetKey(const Phrase &sourcePh if (!ret.first) { // source phrase contains a word unknown in the pt. // We know immediately there's no translation for it - } - else { + } else { ret.second = m_engine->getKey(probingSource, sourceSize); } @@ -296,16 +289,16 @@ TargetPhrases *ProbingPT::CreateTargetPhrases(MemPool &pool, } TargetPhraseImpl *ProbingPT::CreateTargetPhrase( - MemPool &pool, - const System &system, - const char *&offset) const + MemPool &pool, + const System &system, + const char *&offset) const { TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset; size_t numRealWords = tpInfo->numWords / m_output.size(); TargetPhraseImpl *tp = - new (pool.Allocate()) TargetPhraseImpl(pool, *this, - system, numRealWords); + new (pool.Allocate()) TargetPhraseImpl(pool, *this, + system, numRealWords); offset += sizeof(TargetPhraseInfo); @@ -322,8 +315,7 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( if (m_engine->num_lex_scores) { tp->scoreProperties = scores + m_engine->num_scores; } - } - else { + } else { // log score 1st SCORE logScores[totalNumScores]; for (size_t i = 0; i < totalNumScores; ++i) { @@ -344,20 +336,20 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( // words for (size_t targetPos = 0; targetPos < numRealWords; ++targetPos) { - for (size_t i = 0; i < m_output.size(); ++i) { - FactorType factorType = m_output[i]; + for (size_t i = 0; i < m_output.size(); ++i) { + FactorType factorType = m_output[i]; - uint32_t *probingId = (uint32_t*) offset; + uint32_t *probingId = (uint32_t*) offset; - const std::pair *factorPair = GetTargetFactor(*probingId); - assert(factorPair); - assert(!factorPair->first); + const std::pair *factorPair = GetTargetFactor(*probingId); + assert(factorPair); + assert(!factorPair->first); - Word &word = (*tp)[targetPos]; - word[factorType] = factorPair->second; + Word &word = (*tp)[targetPos]; + word[factorType] = factorPair->second; - offset += sizeof(uint32_t); - } + offset += sizeof(uint32_t); + } } // align @@ -372,7 +364,7 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( } void ProbingPT::GetSourceProbingIds(const Phrase &sourcePhrase, - bool &ok, uint64_t probingSource[]) const + bool &ok, uint64_t probingSource[]) const { size_t size = sourcePhrase.GetSize(); @@ -382,8 +374,7 @@ void ProbingPT::GetSourceProbingIds(const Phrase &sourcePhrase, if (probingId == m_unkId) { ok = false; return; - } - else { + } else { probingSource[i] = probingId; } } @@ -396,14 +387,14 @@ uint64_t ProbingPT::GetSourceProbingId(const Word &word) const uint64_t ret = 0; for (size_t i = 0; i < m_input.size(); ++i) { - FactorType factorType = m_input[i]; - const Factor *factor = word[factorType]; + FactorType factorType = m_input[i]; + const Factor *factor = word[factorType]; - size_t factorId = factor->GetId(); - if (factorId >= m_sourceVocab.size()) { - return m_unkId; - } - ret += m_sourceVocab[factorId]; + size_t factorId = factor->GetId(); + if (factorId >= m_sourceVocab.size()) { + return m_unkId; + } + ret += m_sourceVocab[factorId]; } return ret; @@ -435,12 +426,12 @@ void ProbingPT::CreateCache(System &system) //cerr << "line=" << line << endl; if (system.isPb) { - PhraseImpl *sourcePhrase = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2]); + PhraseImpl *sourcePhrase = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2]); - /* + /* std::pair retStruct = GetKey(*sourcePhrase); if (!retStruct.first) { - UTIL_THROW2("Unknown cache entry"); + UTIL_THROW2("Unknown cache entry"); } cerr << "key=" << retStruct.second << " " << key << endl; */ @@ -448,9 +439,8 @@ void ProbingPT::CreateCache(System &system) assert(tps); m_cachePb[key] = tps; - } - else { - // SCFG + } else { + // SCFG SCFG::PhraseImpl *sourcePhrase = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[2], false); //cerr << "sourcePhrase=" << sourcePhrase->Debug(system) << endl; @@ -473,8 +463,7 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT) isNT = false; if (system.isPb) { return; - } - else { + } else { isNT = (wordStr[0] == '[' && wordStr[wordStr.size() - 1] == ']'); //cerr << "nt=" << nt << endl; @@ -482,8 +471,7 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT) size_t startPos = wordStr.find("]["); if (startPos == string::npos) { startPos = 1; - } - else { + } else { startPos += 2; } @@ -494,9 +482,9 @@ void ProbingPT::ReformatWord(System &system, std::string &wordStr, bool &isNT) } void ProbingPT::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const { //cerr << "InitActiveChart=" << path.Debug(cerr, mgr.system) << endl; size_t ptInd = GetPtInd(); @@ -505,10 +493,10 @@ void ProbingPT::InitActiveChart( } void ProbingPT::Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { //cerr << "Lookup=" << endl; if (path.range.GetNumWordsCovered() > maxChartSpan) { @@ -546,13 +534,13 @@ void ProbingPT::Lookup(MemPool &pool, } void ProbingPT::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { const ActiveChartEntryProbing &prevEntryCast = static_cast(prevEntry); @@ -568,7 +556,7 @@ void ProbingPT::LookupGivenNode( // check in cache CacheSCFG::const_iterator iter = m_cacheSCFG.find(key.second); if (iter != m_cacheSCFG.end()) { - //cerr << "FOUND IN CACHE " << key.second << " " << sourcePhrase.Debug(mgr.system) << endl; + //cerr << "FOUND IN CACHE " << key.second << " " << sourcePhrase.Debug(mgr.system) << endl; SCFG::TargetPhrases *tps = iter->second; ActiveChartEntryProbing *chartEntry = new (pool.Allocate()) ActiveChartEntryProbing(pool, prevEntryCast); @@ -581,42 +569,41 @@ void ProbingPT::LookupGivenNode( outPath.AddActiveChartEntry(ptInd, chartEntry); outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind()); - } - else { - // not in cache. Lookup - std::pair tpsPair = CreateTargetPhrasesSCFG(pool, mgr.system, sourcePhrase, key.second); - assert(tpsPair.first && tpsPair.second); + } else { + // not in cache. Lookup + std::pair tpsPair = CreateTargetPhrasesSCFG(pool, mgr.system, sourcePhrase, key.second); + assert(tpsPair.first && tpsPair.second); - if (tpsPair.first) { - // new entries - ActiveChartEntryProbing *chartEntry = new (pool.Allocate()) ActiveChartEntryProbing(pool, prevEntryCast); - //cerr << "AFTER chartEntry" << endl; + if (tpsPair.first) { + // new entries + ActiveChartEntryProbing *chartEntry = new (pool.Allocate()) ActiveChartEntryProbing(pool, prevEntryCast); + //cerr << "AFTER chartEntry" << endl; - chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this); - //cerr << "AFTER AddSymbolBindElement" << endl; + chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this); + //cerr << "AFTER AddSymbolBindElement" << endl; - size_t ptInd = GetPtInd(); - outPath.AddActiveChartEntry(ptInd, chartEntry); - //cerr << "AFTER AddActiveChartEntry" << endl; + size_t ptInd = GetPtInd(); + outPath.AddActiveChartEntry(ptInd, chartEntry); + //cerr << "AFTER AddActiveChartEntry" << endl; - if (tpsPair.second) { - // there are some rules - //cerr << "symbolbind=" << chartEntry->GetSymbolBind().Debug(mgr.system) << endl; - outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tpsPair.second, chartEntry->GetSymbolBind()); - } - } + if (tpsPair.second) { + // there are some rules + //cerr << "symbolbind=" << chartEntry->GetSymbolBind().Debug(mgr.system) << endl; + outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tpsPair.second, chartEntry->GetSymbolBind()); + } + } } } SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG( - MemPool &pool, - const System &system, - const char *&offset) const + MemPool &pool, + const System &system, + const char *&offset) const { TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset; SCFG::TargetPhraseImpl *tp = - new (pool.Allocate()) SCFG::TargetPhraseImpl(pool, *this, - system, tpInfo->numWords - 1); + new (pool.Allocate()) SCFG::TargetPhraseImpl(pool, *this, + system, tpInfo->numWords - 1); offset += sizeof(TargetPhraseInfo); @@ -633,8 +620,7 @@ SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG( if (m_engine->num_lex_scores) { tp->scoreProperties = scores + m_engine->num_scores; } - } - else { + } else { // log score 1st SCORE logScores[totalNumScores]; for (size_t i = 0; i < totalNumScores; ++i) { @@ -706,47 +692,47 @@ std::pair ProbingPT::CreateTargetPhrasesSCFG(MemPool /* if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) { - cerr << "range=" << outPath.range - << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) << " " << prevEntryCast.GetKey() - << " wordSought=" << wordSought.Debug(mgr.system) - << " key=" << key.first << " " << key.second - << " query_result=" << query_result.first << " " << (query_result.second == NONE) - << endl; + cerr << "range=" << outPath.range + << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) << " " << prevEntryCast.GetKey() + << " wordSought=" << wordSought.Debug(mgr.system) + << " key=" << key.first << " " << key.second + << " query_result=" << query_result.first << " " << (query_result.second == NONE) + << endl; } */ if (query_result.first) { ret.first = true; - size_t ptInd = GetPtInd(); + size_t ptInd = GetPtInd(); - if (query_result.second != NONE) { - // there are some rules - const FeatureFunctions &ffs = system.featureFunctions; + if (query_result.second != NONE) { + // there are some rules + const FeatureFunctions &ffs = system.featureFunctions; - const char *offset = m_engine->memTPS + query_result.second; - uint64_t *numTP = (uint64_t*) offset; - //cerr << "numTP=" << *numTP << endl; + const char *offset = m_engine->memTPS + query_result.second; + uint64_t *numTP = (uint64_t*) offset; + //cerr << "numTP=" << *numTP << endl; - SCFG::TargetPhrases *tps = new (pool.Allocate()) SCFG::TargetPhrases(pool, *numTP); - ret.second = tps; + SCFG::TargetPhrases *tps = new (pool.Allocate()) SCFG::TargetPhrases(pool, *numTP); + ret.second = tps; - offset += sizeof(uint64_t); - for (size_t i = 0; i < *numTP; ++i) { - SCFG::TargetPhraseImpl *tp = CreateTargetPhraseSCFG(pool, system, offset); - assert(tp); - //cerr << "tp=" << tp->Debug(mgr.system) << endl; + offset += sizeof(uint64_t); + for (size_t i = 0; i < *numTP; ++i) { + SCFG::TargetPhraseImpl *tp = CreateTargetPhraseSCFG(pool, system, offset); + assert(tp); + //cerr << "tp=" << tp->Debug(mgr.system) << endl; - ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp); + ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp); - tps->AddTargetPhrase(*tp); + tps->AddTargetPhrase(*tp); - } + } - tps->SortAndPrune(m_tableLimit); - ffs.EvaluateAfterTablePruning(pool, *tps, sourcePhrase); - //cerr << "tps=" << tps->GetSize() << endl; + tps->SortAndPrune(m_tableLimit); + ffs.EvaluateAfterTablePruning(pool, *tps, sourcePhrase); + //cerr << "tps=" << tps->GetSize() << endl; - } + } } return ret; diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.h b/moses2/TranslationModel/ProbingPT/ProbingPT.h index c5fbefd6f..e0dcf40f1 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.h +++ b/moses2/TranslationModel/ProbingPT/ProbingPT.h @@ -35,35 +35,36 @@ class TargetPhrases; class ProbingPT: public Moses2::PhraseTable { ////////////////////////////////////// - class ActiveChartEntryProbing : public SCFG::ActiveChartEntry - { - typedef SCFG::ActiveChartEntry Parent; - public: + class ActiveChartEntryProbing : public SCFG::ActiveChartEntry + { + typedef SCFG::ActiveChartEntry Parent; + public: - ActiveChartEntryProbing(MemPool &pool) + ActiveChartEntryProbing(MemPool &pool) :Parent(pool) ,m_key(0) - {} + {} - ActiveChartEntryProbing( - MemPool &pool, - const ActiveChartEntryProbing &prevEntry); + ActiveChartEntryProbing( + MemPool &pool, + const ActiveChartEntryProbing &prevEntry); - uint64_t GetKey() const - { return m_key; } + uint64_t GetKey() const { + return m_key; + } - std::pair GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const; + std::pair GetKey(const SCFG::Word &nextWord, const ProbingPT &pt) const; - virtual void AddSymbolBindElement( - const Range &range, - const SCFG::Word &word, - const Moses2::Hypotheses *hypos, - const Moses2::PhraseTable &pt); + virtual void AddSymbolBindElement( + const Range &range, + const SCFG::Word &word, + const Moses2::Hypotheses *hypos, + const Moses2::PhraseTable &pt); - protected: - uint64_t m_key; - }; - ////////////////////////////////////// + protected: + uint64_t m_key; + }; + ////////////////////////////////////// public: ProbingPT(size_t startInd, const std::string &line); @@ -73,20 +74,21 @@ public: virtual void SetParameter(const std::string& key, const std::string& value); void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; - uint64_t GetUnk() const - { return m_unkId; } + uint64_t GetUnk() const { + return m_unkId; + } // SCFG void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; virtual void Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: @@ -101,14 +103,13 @@ protected: void CreateAlignmentMap(System &system, const std::string path); TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; TargetPhrases *CreateTargetPhrases(MemPool &pool, const System &system, - const Phrase &sourcePhrase, uint64_t key) const; + const Phrase &sourcePhrase, uint64_t key) const; TargetPhraseImpl *CreateTargetPhrase(MemPool &pool, const System &system, - const char *&offset) const; + const char *&offset) const; - inline const std::pair *GetTargetFactor(uint32_t probingId) const - { + inline const std::pair *GetTargetFactor(uint32_t probingId) const { if (probingId >= m_targetVocab.size()) { return NULL; } @@ -118,7 +119,7 @@ protected: std::pair GetKey(const Phrase &sourcePhrase) const; void GetSourceProbingIds(const Phrase &sourcePhrase, bool &ok, - uint64_t probingSource[]) const; + uint64_t probingSource[]) const; uint64_t GetSourceProbingId(const Word &word) const; @@ -135,22 +136,22 @@ protected: // SCFG void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; std::pair CreateTargetPhrasesSCFG(MemPool &pool, const System &system, const Phrase &sourcePhrase, uint64_t key) const; // return value: 1st = there are actual rules, not just a empty cell for prefix SCFG::TargetPhraseImpl *CreateTargetPhraseSCFG( - MemPool &pool, - const System &system, - const char *&offset) const; + MemPool &pool, + const System &system, + const char *&offset) const; }; diff --git a/moses2/TranslationModel/ProbingPT/StoreTarget.cpp b/moses2/TranslationModel/ProbingPT/StoreTarget.cpp index 326aaea5f..af0de5c31 100644 --- a/moses2/TranslationModel/ProbingPT/StoreTarget.cpp +++ b/moses2/TranslationModel/ProbingPT/StoreTarget.cpp @@ -17,12 +17,12 @@ namespace Moses2 { StoreTarget::StoreTarget(const std::string &basepath) -:m_basePath(basepath) -,m_vocab(basepath + "/TargetVocab.dat") + :m_basePath(basepath) + ,m_vocab(basepath + "/TargetVocab.dat") { std::string path = basepath + "/TargetColl.dat"; m_fileTargetColl.open(path.c_str(), - std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc); + std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc); if (!m_fileTargetColl.is_open()) { throw "can't create file "; } @@ -112,10 +112,10 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) vector nonTerms; util::TokenIter it; it = util::TokenIter(line.target_phrase, - util::SingleCharacter(' ')); + util::SingleCharacter(' ')); while (it) { - StringPiece word = *it; - //cerr << "word=" << word << endl; + StringPiece word = *it; + //cerr << "word=" << word << endl; bool nonTerm = false; if (scfg) { @@ -129,16 +129,16 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) util::TokenIter itFactor; itFactor = util::TokenIter(word, - util::SingleCharacter('|')); + util::SingleCharacter('|')); while (itFactor) { - StringPiece factor = *itFactor; + StringPiece factor = *itFactor; - string factorStr = factor.as_string(); - uint32_t vocabId = m_vocab.GetVocabId(factorStr); + string factorStr = factor.as_string(); + uint32_t vocabId = m_vocab.GetVocabId(factorStr); - rule->target_phrase.push_back(vocabId); + rule->target_phrase.push_back(vocabId); - itFactor++; + itFactor++; } it++; @@ -146,7 +146,7 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) // probs it = util::TokenIter(line.prob, - util::SingleCharacter(' ')); + util::SingleCharacter(' ')); while (it) { string tok = it->as_string(); float prob = Scan(tok); @@ -170,7 +170,7 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) // alignment it = util::TokenIter(line.word_align, - util::SingleCharacter(' ')); + util::SingleCharacter(' ')); while (it) { string tokPair = Trim(it->as_string()); if (tokPair.empty()) { @@ -193,8 +193,7 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) rule->word_align_non_term.push_back(sourcePos); rule->word_align_non_term.push_back(targetPos); //cerr << (int) rule->word_all1.back() << " "; - } - else { + } else { rule->word_align_term.push_back(sourcePos); rule->word_align_term.push_back(targetPos); } @@ -221,19 +220,18 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) uint32_t StoreTarget::GetAlignId(const std::vector &align) { boost::unordered_map, uint32_t>::iterator iter = - m_aligns.find(align); + m_aligns.find(align); if (iter == m_aligns.end()) { uint32_t ind = m_aligns.size(); m_aligns[align] = ind; return ind; - } - else { + } else { return iter->second; } } void StoreTarget::AppendLexRO(std::string &prop, std::vector &retvector, - bool log_prob) const + bool log_prob) const { size_t startPos = prop.find("{{LexRO "); @@ -258,7 +256,7 @@ void StoreTarget::AppendLexRO(std::string &prop, std::vector &retvector, // exclude LexRO property from property column prop = prop.substr(0, startPos) - + prop.substr(endPos + 2, prop.size() - endPos - 2); + + prop.substr(endPos + 2, prop.size() - endPos - 2); //cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl; } } diff --git a/moses2/TranslationModel/ProbingPT/StoreTarget.h b/moses2/TranslationModel/ProbingPT/StoreTarget.h index 6fc3b1f66..77ccf08cd 100644 --- a/moses2/TranslationModel/ProbingPT/StoreTarget.h +++ b/moses2/TranslationModel/ProbingPT/StoreTarget.h @@ -43,7 +43,7 @@ protected: void Save(const target_text &rule); void AppendLexRO(std::string &prop, std::vector &retvector, - bool log_prob) const; + bool log_prob) const; }; diff --git a/moses2/TranslationModel/ProbingPT/StoreVocab.h b/moses2/TranslationModel/ProbingPT/StoreVocab.h index e9808707a..3c405af66 100644 --- a/moses2/TranslationModel/ProbingPT/StoreVocab.h +++ b/moses2/TranslationModel/ProbingPT/StoreVocab.h @@ -24,31 +24,27 @@ protected: public: StoreVocab(const std::string &path) - :m_path(path) + :m_path(path) {} virtual ~StoreVocab() {} - VOCABID GetVocabId(const std::string &word) - { + VOCABID GetVocabId(const std::string &word) { typename Coll::iterator iter = m_vocab.find(word); if (iter == m_vocab.end()) { VOCABID ind = m_vocab.size() + 1; m_vocab[word] = ind; return ind; - } - else { + } else { return iter->second; } } - void Insert(VOCABID id, const std::string &word) - { + void Insert(VOCABID id, const std::string &word) { m_vocab[word] = id; } - void Save() - { + void Save() { OutputFileStream strme(m_path); typename Coll::const_iterator iter; diff --git a/moses2/TranslationModel/ProbingPT/hash.cpp b/moses2/TranslationModel/ProbingPT/hash.cpp index aab5ee2b3..a0bda389a 100644 --- a/moses2/TranslationModel/ProbingPT/hash.cpp +++ b/moses2/TranslationModel/ProbingPT/hash.cpp @@ -21,16 +21,16 @@ std::vector getVocabIDs(const StringPiece &textin) util::TokenIter itWord(textin, util::SingleCharacter(' ')); while (itWord) { - StringPiece word = *itWord; - uint64_t id = 0; + StringPiece word = *itWord; + uint64_t id = 0; - util::TokenIter itFactor(word, util::SingleCharacter('|')); + util::TokenIter itFactor(word, util::SingleCharacter('|')); while (itFactor) { - StringPiece factor = *itFactor; - //cerr << "factor=" << factor << endl; + StringPiece factor = *itFactor; + //cerr << "factor=" << factor << endl; - id += getHash(factor); - itFactor++; + id += getHash(factor); + itFactor++; } output.push_back(id); diff --git a/moses2/TranslationModel/ProbingPT/querying.cpp b/moses2/TranslationModel/ProbingPT/querying.cpp index 9ea2d8cb6..d09bd45a8 100644 --- a/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/moses2/TranslationModel/ProbingPT/querying.cpp @@ -46,10 +46,9 @@ QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) found = Get(keyValue, "API_VERSION", version); if (!found) { std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; - } - else if (version != API_VERSION) { + } else if (version != API_VERSION) { std::cerr << "The ProbingPT API has changed. " << version << "!=" - << API_VERSION << " Please rebinarize your phrase tables." << std::endl; + << API_VERSION << " Please rebinarize your phrase tables." << std::endl; exit(EXIT_FAILURE); } @@ -143,37 +142,37 @@ void QueryEngine::read_alignments(const std::string &alignPath) void QueryEngine::file_exits(const std::string &basePath) { - if (!FileExists(basePath + "/Alignments.dat")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat"); - } - if (!FileExists(basePath + "/TargetColl.dat")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat"); - } - if (!FileExists(basePath + "/TargetVocab.dat")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat"); - } - if (!FileExists(basePath + "/cache")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/cache"); - } - if (!FileExists(basePath + "/config")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/config"); - } - if (!FileExists(basePath + "/probing_hash.dat")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat"); - } - if (!FileExists(basePath + "/source_vocabids")) { - UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids"); - } + if (!FileExists(basePath + "/Alignments.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat"); + } + if (!FileExists(basePath + "/TargetColl.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat"); + } + if (!FileExists(basePath + "/TargetVocab.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat"); + } + if (!FileExists(basePath + "/cache")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/cache"); + } + if (!FileExists(basePath + "/config")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/config"); + } + if (!FileExists(basePath + "/probing_hash.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat"); + } + if (!FileExists(basePath + "/source_vocabids")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids"); + } - /* + /* - if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || - !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) || - !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") || - !FileExists(basepath + "/cache")) { - UTIL_THROW2("A required table doesn't exist in: " << basepath); - } - */ + if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || + !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) || + !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") || + !FileExists(basepath + "/cache")) { + UTIL_THROW2("A required table doesn't exist in: " << basepath); + } + */ } } diff --git a/moses2/TranslationModel/ProbingPT/storing.cpp b/moses2/TranslationModel/ProbingPT/storing.cpp index 75cdcc038..c27e99634 100644 --- a/moses2/TranslationModel/ProbingPT/storing.cpp +++ b/moses2/TranslationModel/ProbingPT/storing.cpp @@ -32,14 +32,12 @@ void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos) child = &m_children[vocabId]; assert(!child->done); child->key = key + (vocabId << pos); - } - else { + } else { child = &iter->second; } child->Add(table, sourcePhrase, pos + 1); - } - else { + } else { // this node was written previously 'cos it has rules done = true; } @@ -66,8 +64,8 @@ void Node::Write(Table &table) /////////////////////////////////////////////////////////////////////// void createProbingPT(const std::string &phrasetable_path, - const std::string &basepath, int num_scores, int num_lex_scores, - bool log_prob, int max_cache_size, bool scfg) + const std::string &basepath, int num_scores, int num_lex_scores, + bool log_prob, int max_cache_size, bool scfg) { std::cerr << "Starting..." << std::endl; @@ -123,12 +121,10 @@ void createProbingPT(const std::string &phrasetable_path, // 1st line prevSource = line.source_phrase.as_string(); storeTarget.Append(line, log_prob, scfg); - } - else if (prevSource == line.source_phrase) { + } else if (prevSource == line.source_phrase) { //If we still have the same line, just append to it: storeTarget.Append(line, log_prob, scfg); - } - else { + } else { assert(prevSource != line.source_phrase); //Create a new entry even @@ -147,7 +143,7 @@ void createProbingPT(const std::string &phrasetable_path, std::vector vocabid_source = getVocabIDs(prevSource); if (scfg) { // storing prefixes? - sourcePhrases.Add(sourceEntries, vocabid_source); + sourcePhrases.Add(sourceEntries, vocabid_source); } sourceEntry.key = getKey(vocabid_source); @@ -155,7 +151,7 @@ void createProbingPT(const std::string &phrasetable_path, cerr << "prevSource=" << prevSource << flush << " vocabids=" << Debug(vocabid_source) << flush << " key=" << sourceEntry.key << endl; - */ + */ //Put into table sourceEntries.Insert(sourceEntry); @@ -175,9 +171,9 @@ void createProbingPT(const std::string &phrasetable_path, uint64_t currKey = getKey(currVocabidSource); CacheItem *item = new CacheItem( - Trim(line.source_phrase.as_string()), - currKey, - toks[1]); + Trim(line.source_phrase.as_string()), + currKey, + toks[1]); cache.push(item); if (max_cache_size > 0 && cache.size() > max_cache_size) { @@ -191,8 +187,7 @@ void createProbingPT(const std::string &phrasetable_path, prevSource = line.source_phrase.as_string(); } - } - catch (util::EndOfFileException e) { + } catch (util::EndOfFileException e) { std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl; @@ -258,8 +253,8 @@ size_t countUniqueSource(const std::string &path) } void serialize_cache( - std::priority_queue, CacheItemOrderer> &cache, - const std::string &path, float totalSourceCount) + std::priority_queue, CacheItemOrderer> &cache, + const std::string &path, float totalSourceCount) { std::vector vec(cache.size()); diff --git a/moses2/TranslationModel/ProbingPT/vocabid.cpp b/moses2/TranslationModel/ProbingPT/vocabid.cpp index 696373ee5..e752f76bc 100644 --- a/moses2/TranslationModel/ProbingPT/vocabid.cpp +++ b/moses2/TranslationModel/ProbingPT/vocabid.cpp @@ -7,27 +7,27 @@ namespace Moses2 { void add_to_map(StoreVocab &sourceVocab, - const StringPiece &textin) + const StringPiece &textin) { //Tokenize util::TokenIter itWord(textin, util::SingleCharacter(' ')); while (itWord) { - StringPiece word = *itWord; + StringPiece word = *itWord; - util::TokenIter itFactor(word, util::SingleCharacter('|')); + util::TokenIter itFactor(word, util::SingleCharacter('|')); while (itFactor) { - StringPiece factor = *itFactor; + StringPiece factor = *itFactor; - sourceVocab.Insert(getHash(factor), factor.as_string()); - itFactor++; + sourceVocab.Insert(getHash(factor), factor.as_string()); + itFactor++; } itWord++; } } void serialize_map(const std::map &karta, - const std::string &filename) + const std::string &filename) { std::ofstream os(filename.c_str()); diff --git a/moses2/TranslationModel/Transliteration.cpp b/moses2/TranslationModel/Transliteration.cpp index f92348ee9..13c884508 100644 --- a/moses2/TranslationModel/Transliteration.cpp +++ b/moses2/TranslationModel/Transliteration.cpp @@ -28,7 +28,7 @@ namespace Moses2 { Transliteration::Transliteration(size_t startInd, const std::string &line) : - PhraseTable(startInd, line) + PhraseTable(startInd, line) { ReadParameters(); UTIL_THROW_IF2(m_mosesDir.empty() || @@ -63,23 +63,23 @@ SetParameter(const std::string& key, const std::string& value) } void Transliteration::Lookup(const Manager &mgr, - InputPathsBase &inputPaths) const + InputPathsBase &inputPaths) const { - BOOST_FOREACH(InputPathBase *pathBase, inputPaths){ - InputPath *path = static_cast(pathBase); + BOOST_FOREACH(InputPathBase *pathBase, inputPaths) { + InputPath *path = static_cast(pathBase); - if (SatisfyBackoff(mgr, *path)) { - const SubPhrase &phrase = path->subPhrase; + if (SatisfyBackoff(mgr, *path)) { + const SubPhrase &phrase = path->subPhrase; - TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tps); - } - } + TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); + path->AddTargetPhrases(*this, tps); + } + } } TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const + InputPath &inputPath) const { const SubPhrase &sourcePhrase = inputPath.subPhrase; size_t hash = sourcePhrase.hash(); @@ -121,10 +121,10 @@ TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool, } std::vector Transliteration::CreateTargetPhrases( - const Manager &mgr, - MemPool &pool, - const SubPhrase &sourcePhrase, - const std::string &outDir) const + const Manager &mgr, + MemPool &pool, + const SubPhrase &sourcePhrase, + const std::string &outDir) const { std::vector ret; @@ -137,7 +137,7 @@ std::vector Transliteration::CreateTargetPhrases( UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore"); TargetPhraseImpl *tp = - new (pool.Allocate()) TargetPhraseImpl(pool, *this, mgr.system, 1); + new (pool.Allocate()) TargetPhraseImpl(pool, *this, mgr.system, 1); Moses2::Word &word = (*tp)[0]; word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]); @@ -166,61 +166,61 @@ void Transliteration::EvaluateInIsolation(const System &system, // SCFG /////////////////////////////////////////////////////////////////////////////////////////// void Transliteration::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const { UTIL_THROW2("Not implemented"); } void Transliteration::Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { UTIL_THROW2("Not implemented"); } void Transliteration::LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { UTIL_THROW2("Not implemented"); } void Transliteration::LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } void Transliteration::LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } void Transliteration::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/TranslationModel/Transliteration.h b/moses2/TranslationModel/Transliteration.h index 15f262ac8..593677d60 100644 --- a/moses2/TranslationModel/Transliteration.h +++ b/moses2/TranslationModel/Transliteration.h @@ -23,55 +23,55 @@ public: void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; virtual void EvaluateInIsolation(const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; void Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; void LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: virtual void LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const; virtual void LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; void SetParameter(const std::string& key, const std::string& value); @@ -80,10 +80,10 @@ protected: std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang; std::vector CreateTargetPhrases( - const Manager &mgr, - MemPool &pool, - const SubPhrase &sourcePhrase, - const std::string &outDir) const; + const Manager &mgr, + MemPool &pool, + const SubPhrase &sourcePhrase, + const std::string &outDir) const; }; diff --git a/moses2/TranslationModel/UnknownWordPenalty.cpp b/moses2/TranslationModel/UnknownWordPenalty.cpp index d786b2cff..e165e7e02 100644 --- a/moses2/TranslationModel/UnknownWordPenalty.cpp +++ b/moses2/TranslationModel/UnknownWordPenalty.cpp @@ -26,8 +26,8 @@ namespace Moses2 { UnknownWordPenalty::UnknownWordPenalty(size_t startInd, const std::string &line) -:PhraseTable(startInd, line) -,m_drop(false) + :PhraseTable(startInd, line) + ,m_drop(false) { m_tuneable = false; ReadParameters(); @@ -42,27 +42,24 @@ void UnknownWordPenalty::SetParameter(const std::string& key, const std::string& { if (key == "drop") { m_drop = Scan(value); - } - else if (key == "prefix") { + } else if (key == "prefix") { m_prefix = value; - } - else if (key == "suffix") { + } else if (key == "suffix") { m_suffix = value; - } - else { + } else { PhraseTable::SetParameter(key, value); } } void UnknownWordPenalty::ProcessXML( - const Manager &mgr, - MemPool &pool, - const Sentence &sentence, - InputPaths &inputPaths) const + const Manager &mgr, + MemPool &pool, + const Sentence &sentence, + InputPaths &inputPaths) const { - const Vector &xmlOptions = sentence.GetXMLOptions(); - BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) { - TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation()); + const Vector &xmlOptions = sentence.GetXMLOptions(); + BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) { + TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation()); if (xmlOption->prob) { Scores &scores = target->GetScores(); @@ -80,22 +77,22 @@ void UnknownWordPenalty::ProcessXML( mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, source); path->AddTargetPhrases(*this, tps); - } + } } void UnknownWordPenalty::Lookup(const Manager &mgr, - InputPathsBase &inputPaths) const + InputPathsBase &inputPaths) const { - BOOST_FOREACH(InputPathBase *pathBase, inputPaths){ - InputPath *path = static_cast(pathBase); + BOOST_FOREACH(InputPathBase *pathBase, inputPaths) { + InputPath *path = static_cast(pathBase); - if (SatisfyBackoff(mgr, *path)) { - const SubPhrase &phrase = path->subPhrase; + if (SatisfyBackoff(mgr, *path)) { + const SubPhrase &phrase = path->subPhrase; - TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tps); - } - } + TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); + path->AddTargetPhrases(*this, tps); + } + } } @@ -108,7 +105,7 @@ TargetPhrases *UnknownWordPenalty::Lookup(const Manager &mgr, MemPool &pool, // any other pt translate this? size_t numPt = mgr.system.mappings.size(); const TargetPhrases **allTPS = - static_cast(inputPath).targetPhrases; + static_cast(inputPath).targetPhrases; for (size_t i = 0; i < numPt; ++i) { const TargetPhrases *otherTps = allTPS[i]; @@ -126,16 +123,15 @@ TargetPhrases *UnknownWordPenalty::Lookup(const Manager &mgr, MemPool &pool, size_t numWords = m_drop ? 0 : 1; TargetPhraseImpl *target = - new (pool.Allocate()) TargetPhraseImpl(pool, *this, - system, numWords); + new (pool.Allocate()) TargetPhraseImpl(pool, *this, + system, numWords); if (!m_drop) { Moses2::Word &word = (*target)[0]; if (m_prefix.empty() && m_suffix.empty()) { word[0] = factor; - } - else { + } else { stringstream strm; if (!m_prefix.empty()) { strm << m_prefix; @@ -172,17 +168,17 @@ void UnknownWordPenalty::EvaluateInIsolation(const System &system, // SCFG /////////////////////////////////////////////////////////////////////////////////////////// void UnknownWordPenalty::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const { } void UnknownWordPenalty::Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { const System &system = mgr.system; @@ -193,7 +189,7 @@ void UnknownWordPenalty::Lookup(MemPool &pool, } if (path.GetNumRules()) { - // only create rules if no other rules + // only create rules if no other rules return; } @@ -240,43 +236,43 @@ void UnknownWordPenalty::Lookup(MemPool &pool, } void UnknownWordPenalty::LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const { } void UnknownWordPenalty::LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } void UnknownWordPenalty::LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } void UnknownWordPenalty::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const { UTIL_THROW2("Not implemented"); } diff --git a/moses2/TranslationModel/UnknownWordPenalty.h b/moses2/TranslationModel/UnknownWordPenalty.h index 52c235a36..112f0b6cf 100644 --- a/moses2/TranslationModel/UnknownWordPenalty.h +++ b/moses2/TranslationModel/UnknownWordPenalty.h @@ -25,61 +25,61 @@ public: void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; + InputPath &inputPath) const; void ProcessXML( - const Manager &mgr, - MemPool &pool, - const Sentence &sentence, - InputPaths &inputPaths) const; + const Manager &mgr, + MemPool &pool, + const Sentence &sentence, + InputPaths &inputPaths) const; virtual void EvaluateInIsolation(const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; + const TargetPhraseImpl &targetPhrase, Scores &scores, + SCORE &estimatedScore) const; virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; void Lookup(MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; void LookupUnary(MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; protected: virtual void LookupNT( - MemPool &pool, - const SCFG::Manager &mgr, - const Moses2::Range &subPhraseRange, - const SCFG::InputPath &prevPath, - const SCFG::Stacks &stacks, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const; virtual void LookupGivenWord( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::InputPath &prevPath, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; protected: bool m_drop; std::string m_prefix, m_suffix; diff --git a/moses2/TranslationTask.cpp b/moses2/TranslationTask.cpp index 375e4709b..219d9ffcb 100644 --- a/moses2/TranslationTask.cpp +++ b/moses2/TranslationTask.cpp @@ -10,14 +10,13 @@ namespace Moses2 { TranslationTask::TranslationTask(System &system, - const std::string &line, - long translationId) + const std::string &line, + long translationId) { if (system.isPb) { - m_mgr = new Manager(system, *this, line, translationId); - } - else { - m_mgr = new SCFG::Manager(system, *this, line, translationId); + m_mgr = new Manager(system, *this, line, translationId); + } else { + m_mgr = new SCFG::Manager(system, *this, line, translationId); } } diff --git a/moses2/TrellisPaths.h b/moses2/TrellisPaths.h index 3e2d9ab9a..6a6a59c1a 100644 --- a/moses2/TrellisPaths.h +++ b/moses2/TrellisPaths.h @@ -14,10 +14,8 @@ namespace Moses2 { template -struct CompareTrellisPath -{ - bool operator()(const T* pathA, const T* pathB) const - { +struct CompareTrellisPath { + bool operator()(const T* pathA, const T* pathB) const { return (pathA->GetFutureScore() < pathB->GetFutureScore()); } }; @@ -28,27 +26,23 @@ class TrellisPaths public: TrellisPaths() {} - virtual ~TrellisPaths() - { + virtual ~TrellisPaths() { while (!empty()) { T *path = Get(); delete path; } } - bool empty() const - { + bool empty() const { return m_coll.empty(); } //! add a new entry into collection - void Add(T *trellisPath) - { + void Add(T *trellisPath) { m_coll.push(trellisPath); } - T *Get() - { + T *Get() { T *top = m_coll.top(); // Detach @@ -56,12 +50,13 @@ public: return top; } - size_t GetSize() const - { return m_coll.size(); } + size_t GetSize() const { + return m_coll.size(); + } protected: typedef std::priority_queue, - CompareTrellisPath > CollectionType; + CompareTrellisPath > CollectionType; CollectionType m_coll; }; diff --git a/moses2/TypeDef.h b/moses2/TypeDef.h index e0a1a93a3..aed39ac11 100644 --- a/moses2/TypeDef.h +++ b/moses2/TypeDef.h @@ -46,8 +46,7 @@ typedef std::vector FactorList; // Note: StaticData uses SearchAlgorithm to determine whether the translation // model is phrase-based or syntax-based. If you add a syntax-based search // algorithm here then you should also update StaticData::IsSyntax(). -enum SearchAlgorithm -{ +enum SearchAlgorithm { Normal = 0, CubePruning = 1, //,CubeGrowing = 2 CYKPlus = 3, @@ -108,12 +107,10 @@ public: bool added; HypothesisBase *other; - StackAdd() - { + StackAdd() { } StackAdd(bool vadded, HypothesisBase *vOther) : - added(vadded), other(vOther) - { + added(vadded), other(vOther) { } }; diff --git a/moses2/Vector.h b/moses2/Vector.h index f35e71825..404d76dd3 100644 --- a/moses2/Vector.h +++ b/moses2/Vector.h @@ -19,13 +19,11 @@ class Vector: public std::vector > public: Vector(MemPool &pool, size_t size = 0, const T &val = T()) : - Parent(size, val, MemPoolAllocator(pool)) - { + Parent(size, val, MemPoolAllocator(pool)) { } Vector(const Vector ©) : - Parent(copy) - { + Parent(copy) { } protected: diff --git a/moses2/Weights.cpp b/moses2/Weights.cpp index 643847eee..e31a0fd3b 100644 --- a/moses2/Weights.cpp +++ b/moses2/Weights.cpp @@ -52,8 +52,8 @@ void Weights::SetWeights(const FeatureFunctions &ffs, const std::string &ffName, UTIL_THROW_IF2(weights.size() != numScores, "Wrong number of weights. " << weights.size() << "!=" << numScores); for (size_t i = 0; i < numScores; ++i) { - SCORE weight = weights[i]; - m_weights[startInd + i] = weight; + SCORE weight = weights[i]; + m_weights[startInd + i] = weight; } } diff --git a/moses2/Weights.h b/moses2/Weights.h index c3c2cee62..96fdb5a71 100644 --- a/moses2/Weights.h +++ b/moses2/Weights.h @@ -22,8 +22,7 @@ public: virtual ~Weights(); void Init(const FeatureFunctions &ffs); - SCORE operator[](size_t ind) const - { + SCORE operator[](size_t ind) const { return m_weights[ind]; } diff --git a/moses2/Word.cpp b/moses2/Word.cpp index fe10330e7..f272f7cdc 100644 --- a/moses2/Word.cpp +++ b/moses2/Word.cpp @@ -33,7 +33,7 @@ Word::~Word() } void Word::CreateFromString(FactorCollection &vocab, const System &system, - const std::string &str) + const std::string &str) { vector toks = Tokenize(str, "|"); for (size_t i = 0; i < toks.size(); ++i) { @@ -45,7 +45,7 @@ void Word::CreateFromString(FactorCollection &vocab, const System &system, // null the rest for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) { - m_factors[i] = NULL; + m_factors[i] = NULL; } } @@ -53,7 +53,7 @@ size_t Word::hash() const { uint64_t seed = 0; size_t ret = util::MurmurHashNative(m_factors, - sizeof(Factor*) * MAX_NUM_FACTORS, seed); + sizeof(Factor*) * MAX_NUM_FACTORS, seed); return ret; } @@ -61,9 +61,9 @@ size_t Word::hash(const std::vector &factors) const { size_t seed = 0; for (size_t i = 0; i < factors.size(); ++i) { - FactorType factorType = factors[i]; - const Factor *factor = m_factors[factorType]; - boost::hash_combine(seed, factor); + FactorType factorType = factors[i]; + const Factor *factor = m_factors[factorType]; + boost::hash_combine(seed, factor); } return seed; } @@ -73,7 +73,7 @@ int Word::Compare(const Word &compare) const { int cmp = memcmp(m_factors, compare.m_factors, - sizeof(Factor*) * MAX_NUM_FACTORS); + sizeof(Factor*) * MAX_NUM_FACTORS); return cmp; /* @@ -112,7 +112,7 @@ void Word::OutputToStream(const System &system, std::ostream &out) const out << *m_factors[ factorTypes[0] ]; for (size_t i = 1; i < factorTypes.size(); ++i) { - FactorType factorType = factorTypes[i]; + FactorType factorType = factorTypes[i]; const Factor *factor = m_factors[factorType]; out << "|" << *factor; diff --git a/moses2/Word.h b/moses2/Word.h index 7210c5140..9d742eece 100644 --- a/moses2/Word.h +++ b/moses2/Word.h @@ -24,33 +24,29 @@ public: virtual ~Word(); void CreateFromString(FactorCollection &vocab, const System &system, - const std::string &str); + const std::string &str); virtual size_t hash() const; virtual size_t hash(const std::vector &factors) const; int Compare(const Word &compare) const; - virtual bool operator==(const Word &compare) const - { + virtual bool operator==(const Word &compare) const { int cmp = Compare(compare); return cmp == 0; } - virtual bool operator!=(const Word &compare) const - { + virtual bool operator!=(const Word &compare) const { return !((*this) == compare); } virtual bool operator<(const Word &compare) const; - const Factor* operator[](size_t ind) const - { + const Factor* operator[](size_t ind) const { return m_factors[ind]; } - const Factor*& operator[](size_t ind) - { + const Factor*& operator[](size_t ind) { return m_factors[ind]; } diff --git a/moses2/defer/CubePruningBitmapStack/Misc.cpp b/moses2/defer/CubePruningBitmapStack/Misc.cpp index 5eb7893f2..9f994ba8b 100644 --- a/moses2/defer/CubePruningBitmapStack/Misc.cpp +++ b/moses2/defer/CubePruningBitmapStack/Misc.cpp @@ -21,83 +21,81 @@ namespace NSCubePruningBitmapStack //////////////////////////////////////////////////////////////////////// QueueItem *QueueItem::Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler) + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler) { - QueueItem *ret; - if (currItem) { - // reuse incoming queue item to create new item - ret = currItem; - ret->Init(mgr, edge, hypoIndex, tpIndex); - } - else if (!queueItemRecycler.empty()) { - // use item from recycle bin - ret = queueItemRecycler.back(); - ret->Init(mgr, edge, hypoIndex, tpIndex); - queueItemRecycler.pop_back(); - } - else { - // create new item - ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); - } + QueueItem *ret; + if (currItem) { + // reuse incoming queue item to create new item + ret = currItem; + ret->Init(mgr, edge, hypoIndex, tpIndex); + } else if (!queueItemRecycler.empty()) { + // use item from recycle bin + ret = queueItemRecycler.back(); + ret->Init(mgr, edge, hypoIndex, tpIndex); + queueItemRecycler.pop_back(); + } else { + // create new item + ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); + } - return ret; + return ret; } QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) -:edge(&edge) -,hypoIndex(hypoIndex) -,tpIndex(tpIndex) + :edge(&edge) + ,hypoIndex(hypoIndex) + ,tpIndex(tpIndex) { - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) { - this->edge = &edge; - this->hypoIndex = hypoIndex; - this->tpIndex = tpIndex; + this->edge = &edge; + this->hypoIndex = hypoIndex; + this->tpIndex = tpIndex; - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::CreateHypothesis(Manager &mgr) { - const Hypothesis *prevHypo = edge->hypos[hypoIndex]; - const TargetPhrase &tp = edge->tps[tpIndex]; + const Hypothesis *prevHypo = edge->hypos[hypoIndex]; + const TargetPhrase &tp = edge->tps[tpIndex]; - //cerr << "hypoIndex=" << hypoIndex << endl; - //cerr << "edge.hypos=" << edge.hypos.size() << endl; - //cerr << prevHypo << endl; - //cerr << *prevHypo << endl; + //cerr << "hypoIndex=" << hypoIndex << endl; + //cerr << "edge.hypos=" << edge.hypos.size() << endl; + //cerr << prevHypo << endl; + //cerr << *prevHypo << endl; - hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); - hypo->EvaluateWhenApplied(); + hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); + hypo->EvaluateWhenApplied(); } //////////////////////////////////////////////////////////////////////// CubeEdge::CubeEdge( - Manager &mgr, - const Hypotheses &hypos, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap) -:hypos(hypos) -,path(path) -,tps(tps) -,newBitmap(newBitmap) + Manager &mgr, + const Hypotheses &hypos, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap) + :hypos(hypos) + ,path(path) + ,tps(tps) + ,newBitmap(newBitmap) { - estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); + estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); } std::ostream& operator<<(std::ostream &out, const CubeEdge &obj) { - out << obj.newBitmap; - return out; + out << obj.newBitmap; + return out; } bool @@ -112,46 +110,46 @@ CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPos } void CubeEdge::CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - assert(hypos.size()); - assert(tps.GetSize()); + assert(hypos.size()); + assert(tps.GetSize()); - QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); - queue.push(item); - bool setSeen = SetSeenPosition(0, 0, seenPositions); - assert(setSeen); + QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); + queue.push(item); + bool setSeen = SetSeenPosition(0, 0, seenPositions); + assert(setSeen); } void CubeEdge::CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - size_t hypoIndex = item->hypoIndex; - size_t tpIndex = item->tpIndex; + size_t hypoIndex = item->hypoIndex; + size_t tpIndex = item->tpIndex; - if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { - // reuse incoming queue item to create new item - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); - assert(newItem == item); - queue.push(newItem); - item = NULL; - } + if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { + // reuse incoming queue item to create new item + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); + assert(newItem == item); + queue.push(newItem); + item = NULL; + } - if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); - queue.push(newItem); - item = NULL; - } + if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); + queue.push(newItem); + item = NULL; + } - if (item) { - // recycle unused queue item - queueItemRecycler.push_back(item); - } + if (item) { + // recycle unused queue item + queueItemRecycler.push_back(item); + } } } diff --git a/moses2/defer/CubePruningBitmapStack/Misc.h b/moses2/defer/CubePruningBitmapStack/Misc.h index 00f3fa865..355f8f4c2 100644 --- a/moses2/defer/CubePruningBitmapStack/Misc.h +++ b/moses2/defer/CubePruningBitmapStack/Misc.h @@ -31,24 +31,24 @@ class CubeEdge; /////////////////////////////////////////// class QueueItem { - ~QueueItem(); // NOT IMPLEMENTED. Use MemPool + ~QueueItem(); // NOT IMPLEMENTED. Use MemPool public: - static QueueItem *Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler); - QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + static QueueItem *Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler); + QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - CubeEdge *edge; - size_t hypoIndex, tpIndex; - Hypothesis *hypo; + CubeEdge *edge; + size_t hypoIndex, tpIndex; + Hypothesis *hypo; protected: - void CreateHypothesis(Manager &mgr); + void CreateHypothesis(Manager &mgr); }; /////////////////////////////////////////// @@ -56,8 +56,8 @@ class QueueItemOrderer { public: bool operator()(QueueItem* itemA, QueueItem* itemB) const { - HypothesisFutureScoreOrderer orderer; - return !orderer(itemA->hypo, itemB->hypo); + HypothesisFutureScoreOrderer orderer; + return !orderer(itemA->hypo, itemB->hypo); } }; @@ -67,38 +67,38 @@ class CubeEdge friend std::ostream& operator<<(std::ostream &, const CubeEdge &); public: - typedef std::priority_queue, - QueueItemOrderer> Queue; + typedef std::priority_queue, + QueueItemOrderer> Queue; - typedef std::pair SeenPositionItem; - typedef boost::unordered_set, - std::equal_to > SeenPositions; + typedef std::pair SeenPositionItem; + typedef boost::unordered_set, + std::equal_to > SeenPositions; - const Hypotheses &hypos; - const InputPath &path; - const TargetPhrases &tps; - const Bitmap &newBitmap; - SCORE estimatedScore; + const Hypotheses &hypos; + const InputPath &path; + const TargetPhrases &tps; + const Bitmap &newBitmap; + SCORE estimatedScore; - CubeEdge(Manager &mgr, - const Hypotheses &hypos, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap); + CubeEdge(Manager &mgr, + const Hypotheses &hypos, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap); bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const; void CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); void CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); protected: diff --git a/moses2/defer/CubePruningBitmapStack/Search.cpp b/moses2/defer/CubePruningBitmapStack/Search.cpp index 6188edfa4..8c06f1340 100644 --- a/moses2/defer/CubePruningBitmapStack/Search.cpp +++ b/moses2/defer/CubePruningBitmapStack/Search.cpp @@ -26,12 +26,12 @@ namespace NSCubePruningBitmapStack //////////////////////////////////////////////////////////////////////// Search::Search(Manager &mgr) -:Moses2::Search(mgr) -,m_stack(mgr) + :Moses2::Search(mgr) + ,m_stack(mgr) -,m_queue(QueueItemOrderer(), std::vector() ) + ,m_queue(QueueItemOrderer(), std::vector() ) -,m_seenPositions() + ,m_seenPositions() { } @@ -41,105 +41,105 @@ Search::~Search() void Search::Decode() { - // init cue edges - m_cubeEdges.resize(mgr.GetInput().GetSize() + 1); - for (size_t i = 0; i < m_cubeEdges.size(); ++i) { - m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges(); - } + // init cue edges + m_cubeEdges.resize(mgr.GetInput().GetSize() + 1); + for (size_t i = 0; i < m_cubeEdges.size(); ++i) { + m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges(); + } - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); - m_stack.Add(initHypo, mgr.GetHypoRecycle()); - PostDecode(0); + m_stack.Add(initHypo, mgr.GetHypoRecycle()); + PostDecode(0); - for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - m_stack.Clear(); - Decode(stackInd); - PostDecode(stackInd); + for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + m_stack.Clear(); + Decode(stackInd); + PostDecode(stackInd); - //m_stack.DebugCounts(); - //cerr << m_stacks << endl; - } + //m_stack.DebugCounts(); + //cerr << m_stacks << endl; + } } void Search::Decode(size_t stackInd) { - Recycler &hypoRecycler = mgr.GetHypoRecycle(); + Recycler &hypoRecycler = mgr.GetHypoRecycle(); - // reuse queue from previous stack. Clear it first - std::vector &container = Container(m_queue); - //cerr << "container=" << container.size() << endl; - BOOST_FOREACH(QueueItem *item, container) { - // recycle unused hypos from queue - Hypothesis *hypo = item->hypo; - hypoRecycler.Recycle(hypo); + // reuse queue from previous stack. Clear it first + std::vector &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); - // recycle queue item - m_queueItemRecycler.push_back(item); - } - container.clear(); + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); - m_seenPositions.clear(); + m_seenPositions.clear(); - // add top hypo from every edge into queue - CubeEdges &edges = *m_cubeEdges[stackInd]; + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[stackInd]; - BOOST_FOREACH(CubeEdge *edge, edges) { - //cerr << *edge << " "; - edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); - } + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << *edge << " "; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } - /* - cerr << "edges: "; - boost::unordered_set uniqueBM; - BOOST_FOREACH(CubeEdge *edge, edges) { - uniqueBM.insert(&edge->newBitmap); - //cerr << *edge << " "; - } - cerr << edges.size() << " " << uniqueBM.size(); - cerr << endl; - */ + /* + cerr << "edges: "; + boost::unordered_set uniqueBM; + BOOST_FOREACH(CubeEdge *edge, edges) { + uniqueBM.insert(&edge->newBitmap); + //cerr << *edge << " "; + } + cerr << edges.size() << " " << uniqueBM.size(); + cerr << endl; + */ - size_t pops = 0; - while (!m_queue.empty() && pops < mgr.system.popLimit) { - // get best hypo from queue, add to stack - //cerr << "queue=" << queue.size() << endl; - QueueItem *item = m_queue.top(); - m_queue.pop(); + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); - CubeEdge *edge = item->edge; + CubeEdge *edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stack.Add(hypo, hypoRecycler); + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stack.Add(hypo, hypoRecycler); - edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); - ++pops; - } + ++pops; + } - /* - // create hypo from every edge. Increase diversity - while (!m_queue.empty()) { - QueueItem *item = m_queue.top(); - m_queue.pop(); + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); - if (item->hypoIndex == 0 && item->tpIndex == 0) { - CubeEdge &edge = item->edge; + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, mgr.GetHypoRecycle()); - } - } - */ + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, mgr.GetHypoRecycle()); + } + } + */ } void Search::PostDecode(size_t stackInd) @@ -149,54 +149,54 @@ void Search::PostDecode(size_t stackInd) Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr); BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) { - const Bitmap &hypoBitmap = *val.first.first; - size_t hypoEndPos = val.first.second; - //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; - // create edges to next hypos from existing hypos - const InputPaths &paths = mgr.GetInputPaths(); + // create edges to next hypos from existing hypos + const InputPaths &paths = mgr.GetInputPaths(); - BOOST_FOREACH(const InputPath *path, paths) { - const Range &pathRange = path->range; - //cerr << "pathRange=" << pathRange << endl; + BOOST_FOREACH(const InputPath *path, paths) { + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; - if (!path->IsUsed()) { - continue; - } - if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { - continue; - } + if (!path->IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - size_t numWords = newBitmap.GetNumWordsCovered(); + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + size_t numWords = newBitmap.GetNumWordsCovered(); - CubeEdges &edges = *m_cubeEdges[numWords]; + CubeEdges &edges = *m_cubeEdges[numWords]; - // sort hypo for a particular bitmap and hypoEndPos - Hypotheses &sortedHypos = *val.second; + // sort hypo for a particular bitmap and hypoEndPos + Hypotheses &sortedHypos = *val.second; - size_t numPt = mgr.system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = path->targetPhrases[i]; - if (tps && tps->GetSize()) { - CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); - edges.push_back(edge); - } - } - } + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); + edges.push_back(edge); + } + } + } } } const Hypothesis *Search::GetBestHypo() const { - std::vector sortedHypos = m_stack.GetBestHypos(1); + std::vector sortedHypos = m_stack.GetBestHypos(1); - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = sortedHypos[0]; - } - return best; + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; } } diff --git a/moses2/defer/CubePruningBitmapStack/Search.h b/moses2/defer/CubePruningBitmapStack/Search.h index 7e58ba91f..1ff0477c6 100644 --- a/moses2/defer/CubePruningBitmapStack/Search.h +++ b/moses2/defer/CubePruningBitmapStack/Search.h @@ -26,29 +26,29 @@ namespace NSCubePruningBitmapStack class Search : public Moses2::Search { public: - Search(Manager &mgr); - virtual ~Search(); + Search(Manager &mgr); + virtual ~Search(); - virtual void Decode(); - const Hypothesis *GetBestHypo() const; + virtual void Decode(); + const Hypothesis *GetBestHypo() const; protected: - Stack m_stack; + Stack m_stack; - CubeEdge::Queue m_queue; - CubeEdge::SeenPositions m_seenPositions; + CubeEdge::Queue m_queue; + CubeEdge::SeenPositions m_seenPositions; - // CUBE PRUNING VARIABLES - // setup - typedef std::vector CubeEdges; - std::vector m_cubeEdges; + // CUBE PRUNING VARIABLES + // setup + typedef std::vector CubeEdges; + std::vector m_cubeEdges; - std::deque m_queueItemRecycler; + std::deque m_queueItemRecycler; - // CUBE PRUNING - // decoding - void Decode(size_t stackInd); - void PostDecode(size_t stackInd); + // CUBE PRUNING + // decoding + void Decode(size_t stackInd); + void PostDecode(size_t stackInd); }; } diff --git a/moses2/defer/CubePruningBitmapStack/Stack.cpp b/moses2/defer/CubePruningBitmapStack/Stack.cpp index 4dfa3b6f4..f6abd2038 100644 --- a/moses2/defer/CubePruningBitmapStack/Stack.cpp +++ b/moses2/defer/CubePruningBitmapStack/Stack.cpp @@ -20,8 +20,8 @@ namespace Moses2 namespace NSCubePruningBitmapStack { MiniStack::MiniStack(const Manager &mgr) -:m_coll() -,m_sortedHypos(NULL) + :m_coll() + ,m_sortedHypos(NULL) {} StackAdd MiniStack::Add(const Hypothesis *hypo) @@ -30,23 +30,21 @@ StackAdd MiniStack::Add(const Hypothesis *hypo) // CHECK RECOMBINATION if (addRet.second) { - // equiv hypo doesn't exists - return StackAdd(true, NULL); - } - else { - const Hypothesis *hypoExisting = *addRet.first; - if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) { - // incoming hypo is better than the one we have - const Hypothesis *const &hypoExisting1 = *addRet.first; - const Hypothesis *&hypoExisting2 = const_cast(hypoExisting1); - hypoExisting2 = hypo; + // equiv hypo doesn't exists + return StackAdd(true, NULL); + } else { + const Hypothesis *hypoExisting = *addRet.first; + if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) { + // incoming hypo is better than the one we have + const Hypothesis *const &hypoExisting1 = *addRet.first; + const Hypothesis *&hypoExisting2 = const_cast(hypoExisting1); + hypoExisting2 = hypo; - return StackAdd(true, const_cast(hypoExisting)); - } - else { - // already storing the best hypo. discard incoming hypo - return StackAdd(false, const_cast(hypo)); - } + return StackAdd(true, const_cast(hypoExisting)); + } else { + // already storing the best hypo. discard incoming hypo + return StackAdd(false, const_cast(hypo)); + } } assert(false); @@ -57,13 +55,13 @@ Hypotheses &MiniStack::GetSortedAndPruneHypos(const Manager &mgr) const if (m_sortedHypos == NULL) { // create sortedHypos first MemPool &pool = mgr.GetPool(); - m_sortedHypos = new (pool.Allocate< Vector >()) Vector(pool, m_coll.size()); + m_sortedHypos = new (pool.Allocate< Vector >()) Vector(pool, m_coll.size()); - size_t ind = 0; - BOOST_FOREACH(const Hypothesis *hypo, m_coll) { - (*m_sortedHypos)[ind] = hypo; - ++ind; - } + size_t ind = 0; + BOOST_FOREACH(const Hypothesis *hypo, m_coll) { + (*m_sortedHypos)[ind] = hypo; + ++ind; + } SortAndPruneHypos(mgr); } @@ -79,33 +77,33 @@ void MiniStack::SortAndPruneHypos(const Manager &mgr) const /* cerr << "UNSORTED hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << *hypo << endl; } cerr << endl; */ Hypotheses::iterator iterMiddle; iterMiddle = (stackSize == 0 || m_sortedHypos->size() < stackSize) - ? m_sortedHypos->end() - : m_sortedHypos->begin() + stackSize; + ? m_sortedHypos->end() + : m_sortedHypos->begin() + stackSize; std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(), - HypothesisFutureScoreOrderer()); + HypothesisFutureScoreOrderer()); // prune if (stackSize && m_sortedHypos->size() > stackSize) { - for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) { - Hypothesis *hypo = const_cast((*m_sortedHypos)[i]); - recycler.Recycle(hypo); - } - m_sortedHypos->resize(stackSize); + for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) { + Hypothesis *hypo = const_cast((*m_sortedHypos)[i]); + recycler.Recycle(hypo); + } + m_sortedHypos->resize(stackSize); } /* cerr << "sorted hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << hypo << " " << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << hypo << " " << *hypo << endl; } cerr << endl; */ @@ -114,20 +112,21 @@ void MiniStack::SortAndPruneHypos(const Manager &mgr) const void MiniStack::Clear() { - m_sortedHypos = NULL; - m_coll.clear(); + m_sortedHypos = NULL; + m_coll.clear(); } /////////////////////////////////////////////////////////////// Stack::Stack(const Manager &mgr) -:m_mgr(mgr) -,m_coll() -,m_miniStackRecycler() + :m_mgr(mgr) + ,m_coll() + ,m_miniStackRecycler() { } -Stack::~Stack() { - // TODO Auto-generated destructor stub +Stack::~Stack() +{ + // TODO Auto-generated destructor stub } void Stack::Add(const Hypothesis *hypo, Recycler &hypoRecycle) @@ -136,7 +135,7 @@ void Stack::Add(const Hypothesis *hypo, Recycler &hypoRecycle) StackAdd added = GetMiniStack(key).Add(hypo); if (added.toBeDeleted) { - hypoRecycle.Recycle(added.toBeDeleted); + hypoRecycle.Recycle(added.toBeDeleted); } } @@ -144,61 +143,59 @@ std::vector Stack::GetBestHypos(size_t num) const { std::vector ret; BOOST_FOREACH(const Coll::value_type &val, m_coll) { - const MiniStack::_HCType &hypos = val.second->GetColl(); - ret.insert(ret.end(), hypos.begin(), hypos.end()); + const MiniStack::_HCType &hypos = val.second->GetColl(); + ret.insert(ret.end(), hypos.begin(), hypos.end()); } std::vector::iterator iterMiddle; iterMiddle = (num == 0 || ret.size() < num) - ? ret.end() - : ret.begin()+num; + ? ret.end() + : ret.begin()+num; std::partial_sort(ret.begin(), iterMiddle, ret.end(), - HypothesisFutureScoreOrderer()); + HypothesisFutureScoreOrderer()); return ret; } size_t Stack::GetHypoSize() const { - size_t ret = 0; - BOOST_FOREACH(const Coll::value_type &val, m_coll) { - const MiniStack::_HCType &hypos = val.second->GetColl(); - ret += hypos.size(); - } - return ret; + size_t ret = 0; + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + const MiniStack::_HCType &hypos = val.second->GetColl(); + ret += hypos.size(); + } + return ret; } MiniStack &Stack::GetMiniStack(const HypoCoverageInternal &key) { - MiniStack *ret; - Coll::iterator iter = m_coll.find(key); - if (iter == m_coll.end()) { - if (m_miniStackRecycler.empty()) { - ret = new (m_mgr.GetPool().Allocate()) MiniStack(m_mgr); - } - else { - ret = m_miniStackRecycler.back(); - ret->Clear(); - m_miniStackRecycler.pop_back(); - } + MiniStack *ret; + Coll::iterator iter = m_coll.find(key); + if (iter == m_coll.end()) { + if (m_miniStackRecycler.empty()) { + ret = new (m_mgr.GetPool().Allocate()) MiniStack(m_mgr); + } else { + ret = m_miniStackRecycler.back(); + ret->Clear(); + m_miniStackRecycler.pop_back(); + } - m_coll[key] = ret; - } - else { - ret = iter->second; - } - return *ret; + m_coll[key] = ret; + } else { + ret = iter->second; + } + return *ret; } void Stack::Clear() { - BOOST_FOREACH(const Coll::value_type &val, m_coll) { - MiniStack *miniStack = val.second; - m_miniStackRecycler.push_back(miniStack); - } + BOOST_FOREACH(const Coll::value_type &val, m_coll) { + MiniStack *miniStack = val.second; + m_miniStackRecycler.push_back(miniStack); + } - m_coll.clear(); + m_coll.clear(); } Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const @@ -212,31 +209,30 @@ Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const size_t i = 0; BOOST_FOREACH(const Coll::value_type &val, m_coll) { - const MiniStack *miniStack = val.second; - const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl(); + const MiniStack *miniStack = val.second; + const MiniStack::MiniStack::_HCType &hypos = miniStack->GetColl(); - BOOST_FOREACH(const Hypothesis *hypo, hypos) { - (*allHypos)[i++] = hypo; - } + BOOST_FOREACH(const Hypothesis *hypo, hypos) { + (*allHypos)[i++] = hypo; + } } SortAndPruneHypos(mgr, *allHypos); // divide hypos by [bitmap, last end pos] BOOST_FOREACH(const Hypothesis *hypo, *allHypos) { - HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); + HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); - Hypotheses *hypos; - SortedHypos::iterator iter; - iter = ret.find(key); - if (iter == ret.end()) { - hypos = new (pool.Allocate()) Hypotheses(pool); - ret[key] = hypos; - } - else { - hypos = iter->second; - } - hypos->push_back(hypo); + Hypotheses *hypos; + SortedHypos::iterator iter; + iter = ret.find(key); + if (iter == ret.end()) { + hypos = new (pool.Allocate()) Hypotheses(pool); + ret[key] = hypos; + } else { + hypos = iter->second; + } + hypos->push_back(hypo); } return ret; @@ -250,33 +246,33 @@ void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const /* cerr << "UNSORTED hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << *hypo << endl; } cerr << endl; */ Hypotheses::iterator iterMiddle; iterMiddle = (stackSize == 0 || hypos.size() < stackSize) - ? hypos.end() - : hypos.begin() + stackSize; + ? hypos.end() + : hypos.begin() + stackSize; std::partial_sort(hypos.begin(), iterMiddle, hypos.end(), - HypothesisFutureScoreOrderer()); + HypothesisFutureScoreOrderer()); // prune if (stackSize && hypos.size() > stackSize) { - for (size_t i = stackSize; i < hypos.size(); ++i) { - Hypothesis *hypo = const_cast(hypos[i]); - recycler.Recycle(hypo); - } - hypos.resize(stackSize); + for (size_t i = stackSize; i < hypos.size(); ++i) { + Hypothesis *hypo = const_cast(hypos[i]); + recycler.Recycle(hypo); + } + hypos.resize(stackSize); } /* cerr << "sorted hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << hypo << " " << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << hypo << " " << *hypo << endl; } cerr << endl; */ @@ -286,15 +282,15 @@ void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const void Stack::DebugCounts() { - /* - cerr << "counts="; - BOOST_FOREACH(const Coll::value_type &val, GetColl()) { - const NSCubePruning::MiniStack &miniStack = *val.second; - size_t count = miniStack.GetColl().size(); - cerr << count << " "; - } - cerr << endl; - */ + /* + cerr << "counts="; + BOOST_FOREACH(const Coll::value_type &val, GetColl()) { + const NSCubePruning::MiniStack &miniStack = *val.second; + size_t count = miniStack.GetColl().size(); + cerr << count << " "; + } + cerr << endl; + */ } } diff --git a/moses2/defer/CubePruningBitmapStack/Stack.h b/moses2/defer/CubePruningBitmapStack/Stack.h index d0687ec59..f052fab42 100644 --- a/moses2/defer/CubePruningBitmapStack/Stack.h +++ b/moses2/defer/CubePruningBitmapStack/Stack.h @@ -27,77 +27,82 @@ typedef Vector Hypotheses; class MiniStack { public: - typedef boost::unordered_set, - UnorderedComparer - > _HCType; + typedef boost::unordered_set, + UnorderedComparer + > _HCType; - MiniStack(const Manager &mgr); + MiniStack(const Manager &mgr); - StackAdd Add(const Hypothesis *hypo); + StackAdd Add(const Hypothesis *hypo); - _HCType &GetColl() - { return m_coll; } + _HCType &GetColl() { + return m_coll; + } - const _HCType &GetColl() const - { return m_coll; } + const _HCType &GetColl() const { + return m_coll; + } - void Clear(); + void Clear(); - Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const; + Hypotheses &GetSortedAndPruneHypos(const Manager &mgr) const; protected: - _HCType m_coll; - mutable Hypotheses *m_sortedHypos; + _HCType m_coll; + mutable Hypotheses *m_sortedHypos; - void SortAndPruneHypos(const Manager &mgr) const; + void SortAndPruneHypos(const Manager &mgr) const; }; ///////////////////////////////////////////// -class Stack { +class Stack +{ protected: public: typedef std::pair HypoCoverage; - // bitmap and current endPos of hypos + // bitmap and current endPos of hypos typedef boost::unordered_map SortedHypos; typedef const Bitmap* HypoCoverageInternal; typedef boost::unordered_map - ,std::equal_to - > Coll; + ,boost::hash + ,std::equal_to + > Coll; - Stack(const Manager &mgr); - virtual ~Stack(); + Stack(const Manager &mgr); + virtual ~Stack(); - size_t GetHypoSize() const; + size_t GetHypoSize() const; - Coll &GetColl() - { return m_coll; } - const Coll &GetColl() const - { return m_coll; } + Coll &GetColl() { + return m_coll; + } + const Coll &GetColl() const { + return m_coll; + } - void Add(const Hypothesis *hypo, Recycler &hypoRecycle); + void Add(const Hypothesis *hypo, Recycler &hypoRecycle); - MiniStack &GetMiniStack(const HypoCoverageInternal &key); + MiniStack &GetMiniStack(const HypoCoverageInternal &key); - std::vector GetBestHypos(size_t num) const; - void Clear(); + std::vector GetBestHypos(size_t num) const; + void Clear(); - SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const; - void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const; + SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const; + void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const; - void DebugCounts(); + void DebugCounts(); protected: - const Manager &m_mgr; - Coll m_coll; + const Manager &m_mgr; + Coll m_coll; - std::deque m_miniStackRecycler; + std::deque m_miniStackRecycler; }; diff --git a/moses2/defer/CubePruningCardinalStack/Misc.cpp b/moses2/defer/CubePruningCardinalStack/Misc.cpp index 8918fdf52..197dc108a 100644 --- a/moses2/defer/CubePruningCardinalStack/Misc.cpp +++ b/moses2/defer/CubePruningCardinalStack/Misc.cpp @@ -21,83 +21,81 @@ namespace NSCubePruningCardinalStack //////////////////////////////////////////////////////////////////////// QueueItem *QueueItem::Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler) + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler) { - QueueItem *ret; - if (currItem) { - // reuse incoming queue item to create new item - ret = currItem; - ret->Init(mgr, edge, hypoIndex, tpIndex); - } - else if (!queueItemRecycler.empty()) { - // use item from recycle bin - ret = queueItemRecycler.back(); - ret->Init(mgr, edge, hypoIndex, tpIndex); - queueItemRecycler.pop_back(); - } - else { - // create new item - ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); - } + QueueItem *ret; + if (currItem) { + // reuse incoming queue item to create new item + ret = currItem; + ret->Init(mgr, edge, hypoIndex, tpIndex); + } else if (!queueItemRecycler.empty()) { + // use item from recycle bin + ret = queueItemRecycler.back(); + ret->Init(mgr, edge, hypoIndex, tpIndex); + queueItemRecycler.pop_back(); + } else { + // create new item + ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); + } - return ret; + return ret; } QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) -:edge(&edge) -,hypoIndex(hypoIndex) -,tpIndex(tpIndex) + :edge(&edge) + ,hypoIndex(hypoIndex) + ,tpIndex(tpIndex) { - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) { - this->edge = &edge; - this->hypoIndex = hypoIndex; - this->tpIndex = tpIndex; + this->edge = &edge; + this->hypoIndex = hypoIndex; + this->tpIndex = tpIndex; - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::CreateHypothesis(Manager &mgr) { - const Hypothesis *prevHypo = edge->hypos[hypoIndex]; - const TargetPhrase &tp = edge->tps[tpIndex]; + const Hypothesis *prevHypo = edge->hypos[hypoIndex]; + const TargetPhrase &tp = edge->tps[tpIndex]; - //cerr << "hypoIndex=" << hypoIndex << endl; - //cerr << "edge.hypos=" << edge.hypos.size() << endl; - //cerr << prevHypo << endl; - //cerr << *prevHypo << endl; + //cerr << "hypoIndex=" << hypoIndex << endl; + //cerr << "edge.hypos=" << edge.hypos.size() << endl; + //cerr << prevHypo << endl; + //cerr << *prevHypo << endl; - hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); - hypo->EvaluateWhenApplied(); + hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); + hypo->EvaluateWhenApplied(); } //////////////////////////////////////////////////////////////////////// CubeEdge::CubeEdge( - Manager &mgr, - const Hypotheses &hypos, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap) -:hypos(hypos) -,path(path) -,tps(tps) -,newBitmap(newBitmap) + Manager &mgr, + const Hypotheses &hypos, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap) + :hypos(hypos) + ,path(path) + ,tps(tps) + ,newBitmap(newBitmap) { - estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); + estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); } std::ostream& operator<<(std::ostream &out, const CubeEdge &obj) { - out << obj.newBitmap; - return out; + out << obj.newBitmap; + return out; } bool @@ -112,46 +110,46 @@ CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPos } void CubeEdge::CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - assert(hypos.size()); - assert(tps.GetSize()); + assert(hypos.size()); + assert(tps.GetSize()); - QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); - queue.push(item); - bool setSeen = SetSeenPosition(0, 0, seenPositions); - assert(setSeen); + QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); + queue.push(item); + bool setSeen = SetSeenPosition(0, 0, seenPositions); + assert(setSeen); } void CubeEdge::CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - size_t hypoIndex = item->hypoIndex; - size_t tpIndex = item->tpIndex; + size_t hypoIndex = item->hypoIndex; + size_t tpIndex = item->tpIndex; - if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { - // reuse incoming queue item to create new item - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); - assert(newItem == item); - queue.push(newItem); - item = NULL; - } + if (hypoIndex + 1 < hypos.size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { + // reuse incoming queue item to create new item + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); + assert(newItem == item); + queue.push(newItem); + item = NULL; + } - if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); - queue.push(newItem); - item = NULL; - } + if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); + queue.push(newItem); + item = NULL; + } - if (item) { - // recycle unused queue item - queueItemRecycler.push_back(item); - } + if (item) { + // recycle unused queue item + queueItemRecycler.push_back(item); + } } } diff --git a/moses2/defer/CubePruningCardinalStack/Misc.h b/moses2/defer/CubePruningCardinalStack/Misc.h index b86c88519..9f5d28f1e 100644 --- a/moses2/defer/CubePruningCardinalStack/Misc.h +++ b/moses2/defer/CubePruningCardinalStack/Misc.h @@ -31,24 +31,24 @@ class CubeEdge; /////////////////////////////////////////// class QueueItem { - ~QueueItem(); // NOT IMPLEMENTED. Use MemPool + ~QueueItem(); // NOT IMPLEMENTED. Use MemPool public: - static QueueItem *Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler); - QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + static QueueItem *Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler); + QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - CubeEdge *edge; - size_t hypoIndex, tpIndex; - Hypothesis *hypo; + CubeEdge *edge; + size_t hypoIndex, tpIndex; + Hypothesis *hypo; protected: - void CreateHypothesis(Manager &mgr); + void CreateHypothesis(Manager &mgr); }; /////////////////////////////////////////// @@ -56,8 +56,8 @@ class QueueItemOrderer { public: bool operator()(QueueItem* itemA, QueueItem* itemB) const { - HypothesisFutureScoreOrderer orderer; - return !orderer(itemA->hypo, itemB->hypo); + HypothesisFutureScoreOrderer orderer; + return !orderer(itemA->hypo, itemB->hypo); } }; @@ -67,39 +67,39 @@ class CubeEdge friend std::ostream& operator<<(std::ostream &, const CubeEdge &); public: - typedef std::priority_queue, - QueueItemOrderer> Queue; + typedef std::priority_queue, + QueueItemOrderer> Queue; - typedef std::pair SeenPositionItem; - typedef boost::unordered_set, - std::equal_to - > SeenPositions; + typedef std::pair SeenPositionItem; + typedef boost::unordered_set, + std::equal_to + > SeenPositions; - const Hypotheses &hypos; - const InputPath &path; - const TargetPhrases &tps; - const Bitmap &newBitmap; - SCORE estimatedScore; + const Hypotheses &hypos; + const InputPath &path; + const TargetPhrases &tps; + const Bitmap &newBitmap; + SCORE estimatedScore; - CubeEdge(Manager &mgr, - const Hypotheses &hypos, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap); + CubeEdge(Manager &mgr, + const Hypotheses &hypos, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap); bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const; void CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); void CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); protected: diff --git a/moses2/defer/CubePruningCardinalStack/Search.cpp b/moses2/defer/CubePruningCardinalStack/Search.cpp index d4899ae46..23cae74eb 100644 --- a/moses2/defer/CubePruningCardinalStack/Search.cpp +++ b/moses2/defer/CubePruningCardinalStack/Search.cpp @@ -26,12 +26,12 @@ namespace NSCubePruningCardinalStack //////////////////////////////////////////////////////////////////////// Search::Search(Manager &mgr) -:Moses2::Search(mgr) -,m_stack(mgr) + :Moses2::Search(mgr) + ,m_stack(mgr) -,m_queue(QueueItemOrderer(), std::vector() ) + ,m_queue(QueueItemOrderer(), std::vector() ) -,m_seenPositions() + ,m_seenPositions() { } @@ -41,105 +41,105 @@ Search::~Search() void Search::Decode() { - // init cue edges - m_cubeEdges.resize(mgr.GetInput().GetSize() + 1); - for (size_t i = 0; i < m_cubeEdges.size(); ++i) { - m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges(); - } + // init cue edges + m_cubeEdges.resize(mgr.GetInput().GetSize() + 1); + for (size_t i = 0; i < m_cubeEdges.size(); ++i) { + m_cubeEdges[i] = new (mgr.GetPool().Allocate()) CubeEdges(); + } - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); - m_stack.Add(initHypo, mgr.GetHypoRecycle()); - PostDecode(0); + m_stack.Add(initHypo, mgr.GetHypoRecycle()); + PostDecode(0); - for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - m_stack.Clear(); - Decode(stackInd); - PostDecode(stackInd); + for (size_t stackInd = 1; stackInd < mgr.GetInput().GetSize() + 1; ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + m_stack.Clear(); + Decode(stackInd); + PostDecode(stackInd); - //m_stack.DebugCounts(); - //cerr << m_stacks << endl; - } + //m_stack.DebugCounts(); + //cerr << m_stacks << endl; + } } void Search::Decode(size_t stackInd) { - Recycler &hypoRecycler = mgr.GetHypoRecycle(); + Recycler &hypoRecycler = mgr.GetHypoRecycle(); - // reuse queue from previous stack. Clear it first - std::vector &container = Container(m_queue); - //cerr << "container=" << container.size() << endl; - BOOST_FOREACH(QueueItem *item, container) { - // recycle unused hypos from queue - Hypothesis *hypo = item->hypo; - hypoRecycler.Recycle(hypo); + // reuse queue from previous stack. Clear it first + std::vector &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); - // recycle queue item - m_queueItemRecycler.push_back(item); - } - container.clear(); + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); - m_seenPositions.clear(); + m_seenPositions.clear(); - // add top hypo from every edge into queue - CubeEdges &edges = *m_cubeEdges[stackInd]; + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[stackInd]; - BOOST_FOREACH(CubeEdge *edge, edges) { - //cerr << *edge << " "; - edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); - } + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << *edge << " "; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } - /* - cerr << "edges: "; - boost::unordered_set uniqueBM; - BOOST_FOREACH(CubeEdge *edge, edges) { - uniqueBM.insert(&edge->newBitmap); - //cerr << *edge << " "; - } - cerr << edges.size() << " " << uniqueBM.size(); - cerr << endl; - */ + /* + cerr << "edges: "; + boost::unordered_set uniqueBM; + BOOST_FOREACH(CubeEdge *edge, edges) { + uniqueBM.insert(&edge->newBitmap); + //cerr << *edge << " "; + } + cerr << edges.size() << " " << uniqueBM.size(); + cerr << endl; + */ - size_t pops = 0; - while (!m_queue.empty() && pops < mgr.system.popLimit) { - // get best hypo from queue, add to stack - //cerr << "queue=" << queue.size() << endl; - QueueItem *item = m_queue.top(); - m_queue.pop(); + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); - CubeEdge *edge = item->edge; + CubeEdge *edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stack.Add(hypo, hypoRecycler); + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stack.Add(hypo, hypoRecycler); - edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); - ++pops; - } + ++pops; + } - /* - // create hypo from every edge. Increase diversity - while (!m_queue.empty()) { - QueueItem *item = m_queue.top(); - m_queue.pop(); + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); - if (item->hypoIndex == 0 && item->tpIndex == 0) { - CubeEdge &edge = item->edge; + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, mgr.GetHypoRecycle()); - } - } - */ + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, mgr.GetHypoRecycle()); + } + } + */ } void Search::PostDecode(size_t stackInd) @@ -149,54 +149,54 @@ void Search::PostDecode(size_t stackInd) Stack::SortedHypos sortedHypos = m_stack.GetSortedAndPruneHypos(mgr); BOOST_FOREACH(const Stack::SortedHypos::value_type &val, sortedHypos) { - const Bitmap &hypoBitmap = *val.first.first; - size_t hypoEndPos = val.first.second; - //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; - // create edges to next hypos from existing hypos - const InputPaths &paths = mgr.GetInputPaths(); + // create edges to next hypos from existing hypos + const InputPaths &paths = mgr.GetInputPaths(); - BOOST_FOREACH(const InputPath *path, paths) { - const Range &pathRange = path->range; - //cerr << "pathRange=" << pathRange << endl; + BOOST_FOREACH(const InputPath *path, paths) { + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; - if (!path->IsUsed()) { - continue; - } - if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { - continue; - } + if (!path->IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - size_t numWords = newBitmap.GetNumWordsCovered(); + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + size_t numWords = newBitmap.GetNumWordsCovered(); - CubeEdges &edges = *m_cubeEdges[numWords]; + CubeEdges &edges = *m_cubeEdges[numWords]; - // sort hypo for a particular bitmap and hypoEndPos - Hypotheses &sortedHypos = *val.second; + // sort hypo for a particular bitmap and hypoEndPos + Hypotheses &sortedHypos = *val.second; - size_t numPt = mgr.system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = path->targetPhrases[i]; - if (tps && tps->GetSize()) { - CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); - edges.push_back(edge); - } - } - } + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, sortedHypos, *path, *tps, newBitmap); + edges.push_back(edge); + } + } + } } } const Hypothesis *Search::GetBestHypo() const { - std::vector sortedHypos = m_stack.GetBestHypos(1); + std::vector sortedHypos = m_stack.GetBestHypos(1); - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = sortedHypos[0]; - } - return best; + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; } } diff --git a/moses2/defer/CubePruningCardinalStack/Search.h b/moses2/defer/CubePruningCardinalStack/Search.h index e772926a2..f641c87d7 100644 --- a/moses2/defer/CubePruningCardinalStack/Search.h +++ b/moses2/defer/CubePruningCardinalStack/Search.h @@ -26,29 +26,29 @@ namespace NSCubePruningCardinalStack class Search : public Moses2::Search { public: - Search(Manager &mgr); - virtual ~Search(); + Search(Manager &mgr); + virtual ~Search(); - virtual void Decode(); - const Hypothesis *GetBestHypo() const; + virtual void Decode(); + const Hypothesis *GetBestHypo() const; protected: - Stack m_stack; + Stack m_stack; - CubeEdge::Queue m_queue; - CubeEdge::SeenPositions m_seenPositions; + CubeEdge::Queue m_queue; + CubeEdge::SeenPositions m_seenPositions; - // CUBE PRUNING VARIABLES - // setup - typedef std::vector CubeEdges; - std::vector m_cubeEdges; + // CUBE PRUNING VARIABLES + // setup + typedef std::vector CubeEdges; + std::vector m_cubeEdges; - std::deque m_queueItemRecycler; + std::deque m_queueItemRecycler; - // CUBE PRUNING - // decoding - void Decode(size_t stackInd); - void PostDecode(size_t stackInd); + // CUBE PRUNING + // decoding + void Decode(size_t stackInd); + void PostDecode(size_t stackInd); }; } diff --git a/moses2/defer/CubePruningCardinalStack/Stack.cpp b/moses2/defer/CubePruningCardinalStack/Stack.cpp index 0c296d8ca..60a3fe1e8 100644 --- a/moses2/defer/CubePruningCardinalStack/Stack.cpp +++ b/moses2/defer/CubePruningCardinalStack/Stack.cpp @@ -22,13 +22,14 @@ namespace NSCubePruningCardinalStack /////////////////////////////////////////////////////////////// Stack::Stack(const Manager &mgr) -:m_mgr(mgr) -,m_coll() + :m_mgr(mgr) + ,m_coll() { } -Stack::~Stack() { - // TODO Auto-generated destructor stub +Stack::~Stack() +{ + // TODO Auto-generated destructor stub } void Stack::Add(const Hypothesis *hypo, Recycler &hypoRecycle) @@ -37,24 +38,22 @@ void Stack::Add(const Hypothesis *hypo, Recycler &hypoRecycle) // CHECK RECOMBINATION if (addRet.second) { - // equiv hypo doesn't exists - } - else { - const Hypothesis *hypoExisting = *addRet.first; - if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) { - // incoming hypo is better than the one we have - const Hypothesis *const &hypoExisting1 = *addRet.first; - const Hypothesis *&hypoExisting2 = const_cast(hypoExisting1); - hypoExisting2 = hypo; + // equiv hypo doesn't exists + } else { + const Hypothesis *hypoExisting = *addRet.first; + if (hypo->GetScores().GetTotalScore() > hypoExisting->GetScores().GetTotalScore()) { + // incoming hypo is better than the one we have + const Hypothesis *const &hypoExisting1 = *addRet.first; + const Hypothesis *&hypoExisting2 = const_cast(hypoExisting1); + hypoExisting2 = hypo; - Hypothesis *hypoToBeDeleted = const_cast(hypoExisting); - hypoRecycle.Recycle(hypoToBeDeleted); - } - else { - // already storing the best hypo. discard incoming hypo - Hypothesis *hypoToBeDeleted = const_cast(hypo); - hypoRecycle.Recycle(hypoToBeDeleted); - } + Hypothesis *hypoToBeDeleted = const_cast(hypoExisting); + hypoRecycle.Recycle(hypoToBeDeleted); + } else { + // already storing the best hypo. discard incoming hypo + Hypothesis *hypoToBeDeleted = const_cast(hypo); + hypoRecycle.Recycle(hypoToBeDeleted); + } } } @@ -65,24 +64,24 @@ std::vector Stack::GetBestHypos(size_t num) const std::vector::iterator iterMiddle; iterMiddle = (num == 0 || ret.size() < num) - ? ret.end() - : ret.begin()+num; + ? ret.end() + : ret.begin()+num; std::partial_sort(ret.begin(), iterMiddle, ret.end(), - HypothesisFutureScoreOrderer()); + HypothesisFutureScoreOrderer()); return ret; } size_t Stack::GetHypoSize() const { - return m_coll.size(); + return m_coll.size(); } void Stack::Clear() { - m_coll.clear(); + m_coll.clear(); } Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const @@ -95,25 +94,24 @@ Stack::SortedHypos Stack::GetSortedAndPruneHypos(const Manager &mgr) const Hypotheses *allHypos = new (pool.Allocate()) Hypotheses(pool, GetHypoSize()); size_t i = 0; BOOST_FOREACH(const Hypothesis *hypo, m_coll) { - (*allHypos)[i++] = hypo; + (*allHypos)[i++] = hypo; } SortAndPruneHypos(mgr, *allHypos); // divide hypos by [bitmap, last end pos] BOOST_FOREACH(const Hypothesis *hypo, *allHypos) { - HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); + HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); - Hypotheses *hypos; - SortedHypos::iterator iter; - iter = ret.find(key); - if (iter == ret.end()) { - hypos = new (pool.Allocate()) Hypotheses(pool); - ret[key] = hypos; - } - else { - hypos = iter->second; - } - hypos->push_back(hypo); + Hypotheses *hypos; + SortedHypos::iterator iter; + iter = ret.find(key); + if (iter == ret.end()) { + hypos = new (pool.Allocate()) Hypotheses(pool); + ret[key] = hypos; + } else { + hypos = iter->second; + } + hypos->push_back(hypo); } return ret; @@ -160,33 +158,33 @@ void Stack::SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const /* cerr << "UNSORTED hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << *hypo << endl; } cerr << endl; */ Hypotheses::iterator iterMiddle; iterMiddle = (stackSize == 0 || hypos.size() < stackSize) - ? hypos.end() - : hypos.begin() + stackSize; + ? hypos.end() + : hypos.begin() + stackSize; std::partial_sort(hypos.begin(), iterMiddle, hypos.end(), - HypothesisFutureScoreOrderer()); + HypothesisFutureScoreOrderer()); // prune if (stackSize && hypos.size() > stackSize) { - for (size_t i = stackSize; i < hypos.size(); ++i) { - Hypothesis *hypo = const_cast(hypos[i]); - recycler.Recycle(hypo); - } - hypos.resize(stackSize); + for (size_t i = stackSize; i < hypos.size(); ++i) { + Hypothesis *hypo = const_cast(hypos[i]); + recycler.Recycle(hypo); + } + hypos.resize(stackSize); } /* cerr << "sorted hypos:" << endl; for (size_t i = 0; i < hypos.size(); ++i) { - const Hypothesis *hypo = hypos[i]; - cerr << hypo << " " << *hypo << endl; + const Hypothesis *hypo = hypos[i]; + cerr << hypo << " " << *hypo << endl; } cerr << endl; */ diff --git a/moses2/defer/CubePruningCardinalStack/Stack.h b/moses2/defer/CubePruningCardinalStack/Stack.h index d6ae80577..94e987b7b 100644 --- a/moses2/defer/CubePruningCardinalStack/Stack.h +++ b/moses2/defer/CubePruningCardinalStack/Stack.h @@ -26,38 +26,41 @@ typedef Vector Hypotheses; ///////////////////////////////////////////// -class Stack { +class Stack +{ protected: - typedef boost::unordered_set, - UnorderedComparer - > _HCType; + typedef boost::unordered_set, + UnorderedComparer + > _HCType; public: - typedef std::pair HypoCoverage; - typedef boost::unordered_map SortedHypos; + typedef std::pair HypoCoverage; + typedef boost::unordered_map SortedHypos; - Stack(const Manager &mgr); - virtual ~Stack(); + Stack(const Manager &mgr); + virtual ~Stack(); - size_t GetHypoSize() const; + size_t GetHypoSize() const; - _HCType &GetColl() - { return m_coll; } - const _HCType &GetColl() const - { return m_coll; } + _HCType &GetColl() { + return m_coll; + } + const _HCType &GetColl() const { + return m_coll; + } - void Add(const Hypothesis *hypo, Recycler &hypoRecycle); + void Add(const Hypothesis *hypo, Recycler &hypoRecycle); - std::vector GetBestHypos(size_t num) const; - void Clear(); + std::vector GetBestHypos(size_t num) const; + void Clear(); - SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const; - void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const; + SortedHypos GetSortedAndPruneHypos(const Manager &mgr) const; + void SortAndPruneHypos(const Manager &mgr, Hypotheses &hypos) const; protected: - const Manager &m_mgr; - _HCType m_coll; + const Manager &m_mgr; + _HCType m_coll; }; diff --git a/moses2/defer/CubePruningPerBitmap/Misc.cpp b/moses2/defer/CubePruningPerBitmap/Misc.cpp index 7b324e244..8e94dac5d 100644 --- a/moses2/defer/CubePruningPerBitmap/Misc.cpp +++ b/moses2/defer/CubePruningPerBitmap/Misc.cpp @@ -20,83 +20,81 @@ namespace NSCubePruningPerBitmap //////////////////////////////////////////////////////////////////////// QueueItem *QueueItem::Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler) + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler) { - QueueItem *ret; - if (currItem) { - // reuse incoming queue item to create new item - ret = currItem; - ret->Init(mgr, edge, hypoIndex, tpIndex); - } - else if (!queueItemRecycler.empty()) { - // use item from recycle bin - ret = queueItemRecycler.back(); - ret->Init(mgr, edge, hypoIndex, tpIndex); - queueItemRecycler.pop_back(); - } - else { - // create new item - ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); - } + QueueItem *ret; + if (currItem) { + // reuse incoming queue item to create new item + ret = currItem; + ret->Init(mgr, edge, hypoIndex, tpIndex); + } else if (!queueItemRecycler.empty()) { + // use item from recycle bin + ret = queueItemRecycler.back(); + ret->Init(mgr, edge, hypoIndex, tpIndex); + queueItemRecycler.pop_back(); + } else { + // create new item + ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); + } - return ret; + return ret; } QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) -:edge(&edge) -,hypoIndex(hypoIndex) -,tpIndex(tpIndex) + :edge(&edge) + ,hypoIndex(hypoIndex) + ,tpIndex(tpIndex) { - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) { - this->edge = &edge; - this->hypoIndex = hypoIndex; - this->tpIndex = tpIndex; + this->edge = &edge; + this->hypoIndex = hypoIndex; + this->tpIndex = tpIndex; - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::CreateHypothesis(Manager &mgr) { - const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex]; - const TargetPhrase &tp = edge->tps[tpIndex]; + const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex]; + const TargetPhrase &tp = edge->tps[tpIndex]; - //cerr << "hypoIndex=" << hypoIndex << endl; - //cerr << "edge.hypos=" << edge.hypos.size() << endl; - //cerr << prevHypo << endl; - //cerr << *prevHypo << endl; + //cerr << "hypoIndex=" << hypoIndex << endl; + //cerr << "edge.hypos=" << edge.hypos.size() << endl; + //cerr << prevHypo << endl; + //cerr << *prevHypo << endl; - hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); - hypo->EvaluateWhenApplied(); + hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); + hypo->EvaluateWhenApplied(); } //////////////////////////////////////////////////////////////////////// CubeEdge::CubeEdge( - Manager &mgr, - const NSCubePruningMiniStack::MiniStack &miniStack, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap) -:miniStack(miniStack) -,path(path) -,tps(tps) -,newBitmap(newBitmap) + Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap) + :miniStack(miniStack) + ,path(path) + ,tps(tps) + ,newBitmap(newBitmap) { - estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); + estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); } std::ostream& operator<<(std::ostream &out, const CubeEdge &obj) { - out << obj.newBitmap; - return out; + out << obj.newBitmap; + return out; } bool @@ -111,47 +109,47 @@ CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPos } void CubeEdge::CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - if (miniStack.GetSortedAndPruneHypos(mgr).size()) { - assert(tps.GetSize()); + if (miniStack.GetSortedAndPruneHypos(mgr).size()) { + assert(tps.GetSize()); - QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); - queue.push(item); - bool setSeen = SetSeenPosition(0, 0, seenPositions); - assert(setSeen); - } + QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); + queue.push(item); + bool setSeen = SetSeenPosition(0, 0, seenPositions); + assert(setSeen); + } } void CubeEdge::CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - size_t hypoIndex = item->hypoIndex; - size_t tpIndex = item->tpIndex; + size_t hypoIndex = item->hypoIndex; + size_t tpIndex = item->tpIndex; - if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { - // reuse incoming queue item to create new item - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); - assert(newItem == item); - queue.push(newItem); - item = NULL; - } + if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { + // reuse incoming queue item to create new item + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); + assert(newItem == item); + queue.push(newItem); + item = NULL; + } - if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); - queue.push(newItem); - item = NULL; - } + if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); + queue.push(newItem); + item = NULL; + } - if (item) { - // recycle unused queue item - queueItemRecycler.push_back(item); - } + if (item) { + // recycle unused queue item + queueItemRecycler.push_back(item); + } } } diff --git a/moses2/defer/CubePruningPerBitmap/Misc.h b/moses2/defer/CubePruningPerBitmap/Misc.h index 77b5ba9c3..3fa22f9a6 100644 --- a/moses2/defer/CubePruningPerBitmap/Misc.h +++ b/moses2/defer/CubePruningPerBitmap/Misc.h @@ -31,24 +31,24 @@ class CubeEdge; /////////////////////////////////////////// class QueueItem { - ~QueueItem(); // NOT IMPLEMENTED. Use MemPool + ~QueueItem(); // NOT IMPLEMENTED. Use MemPool public: - static QueueItem *Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler); - QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + static QueueItem *Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler); + QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - CubeEdge *edge; - size_t hypoIndex, tpIndex; - Hypothesis *hypo; + CubeEdge *edge; + size_t hypoIndex, tpIndex; + Hypothesis *hypo; protected: - void CreateHypothesis(Manager &mgr); + void CreateHypothesis(Manager &mgr); }; /////////////////////////////////////////// @@ -56,8 +56,8 @@ class QueueItemOrderer { public: bool operator()(QueueItem* itemA, QueueItem* itemB) const { - HypothesisFutureScoreOrderer orderer; - return !orderer(itemA->hypo, itemB->hypo); + HypothesisFutureScoreOrderer orderer; + return !orderer(itemA->hypo, itemB->hypo); } }; @@ -67,39 +67,39 @@ class CubeEdge friend std::ostream& operator<<(std::ostream &, const CubeEdge &); public: - typedef std::priority_queue, - QueueItemOrderer> Queue; + typedef std::priority_queue, + QueueItemOrderer> Queue; - typedef std::pair SeenPositionItem; - typedef boost::unordered_set, - std::equal_to - > SeenPositions; + typedef std::pair SeenPositionItem; + typedef boost::unordered_set, + std::equal_to + > SeenPositions; - const NSCubePruningMiniStack::MiniStack &miniStack; - const InputPath &path; - const TargetPhrases &tps; - const Bitmap &newBitmap; - SCORE estimatedScore; + const NSCubePruningMiniStack::MiniStack &miniStack; + const InputPath &path; + const TargetPhrases &tps; + const Bitmap &newBitmap; + SCORE estimatedScore; - CubeEdge(Manager &mgr, - const NSCubePruningMiniStack::MiniStack &miniStack, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap); + CubeEdge(Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap); bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const; void CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); void CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); protected: diff --git a/moses2/defer/CubePruningPerBitmap/Search.cpp b/moses2/defer/CubePruningPerBitmap/Search.cpp index b0eddcc21..d07b28a72 100644 --- a/moses2/defer/CubePruningPerBitmap/Search.cpp +++ b/moses2/defer/CubePruningPerBitmap/Search.cpp @@ -25,13 +25,13 @@ namespace NSCubePruningPerBitmap //////////////////////////////////////////////////////////////////////// Search::Search(Manager &mgr) -:Moses2::Search(mgr) -,m_stacks(mgr) + :Moses2::Search(mgr) + ,m_stacks(mgr) -,m_queue(QueueItemOrderer(), - std::vector() ) + ,m_queue(QueueItemOrderer(), + std::vector() ) -,m_seenPositions() + ,m_seenPositions() { } @@ -41,28 +41,28 @@ Search::~Search() void Search::Decode() { - // init stacks - m_stacks.Init(mgr.GetInput().GetSize() + 1); + // init stacks + m_stacks.Init(mgr.GetInput().GetSize() + 1); - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); - m_stacks.Add(initHypo, mgr.GetHypoRecycle()); + m_stacks.Add(initHypo, mgr.GetHypoRecycle()); - for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) { - CreateSearchGraph(stackInd); - } + for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) { + CreateSearchGraph(stackInd); + } - for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - Decode(stackInd); + for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + Decode(stackInd); - //cerr << m_stacks << endl; - } + //cerr << m_stacks << endl; + } - //DebugCounts(); + //DebugCounts(); } void Search::Decode(size_t stackInd) @@ -73,26 +73,26 @@ void Search::Decode(size_t stackInd) boost::unordered_map > uniqueBM; BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - const Bitmap *bitmap = val.first.first; - uniqueBM[bitmap].push_back(&miniStack); + const Bitmap *bitmap = val.first.first; + uniqueBM[bitmap].push_back(&miniStack); } // decode each bitmap boost::unordered_map >::iterator iter; for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) { - const vector &miniStacks = iter->second; - Decode(miniStacks); + const vector &miniStacks = iter->second; + Decode(miniStacks); } /* // FOR EACH STACK vector miniStacks; BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - miniStacks.push_back(&miniStack); + miniStacks.push_back(&miniStack); } Decode(miniStacks); */ @@ -100,68 +100,68 @@ void Search::Decode(size_t stackInd) void Search::Decode(const vector &miniStacks) { - Recycler &hypoRecycler = mgr.GetHypoRecycle(); + Recycler &hypoRecycler = mgr.GetHypoRecycle(); - // reuse queue from previous stack. Clear it first - std::vector &container = Container(m_queue); - //cerr << "container=" << container.size() << endl; - BOOST_FOREACH(QueueItem *item, container) { - // recycle unused hypos from queue - Hypothesis *hypo = item->hypo; - hypoRecycler.Recycle(hypo); + // reuse queue from previous stack. Clear it first + std::vector &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); - // recycle queue item - m_queueItemRecycler.push_back(item); - } - container.clear(); + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); - m_seenPositions.clear(); + m_seenPositions.clear(); - BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) { - // add top hypo from every edge into queue - CubeEdges &edges = *m_cubeEdges[miniStack]; + BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) { + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[miniStack]; - BOOST_FOREACH(CubeEdge *edge, edges) { - //cerr << "edge=" << *edge << endl; - edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); - } - } + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << "edge=" << *edge << endl; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } + } - size_t pops = 0; - while (!m_queue.empty() && pops < mgr.system.popLimit) { - // get best hypo from queue, add to stack - //cerr << "queue=" << queue.size() << endl; - QueueItem *item = m_queue.top(); - m_queue.pop(); + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); - CubeEdge *edge = item->edge; + CubeEdge *edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, hypoRecycler); + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, hypoRecycler); - edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); - ++pops; - } + ++pops; + } - /* - // create hypo from every edge. Increase diversity - while (!m_queue.empty()) { - QueueItem *item = m_queue.top(); - m_queue.pop(); + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); - if (item->hypoIndex == 0 && item->tpIndex == 0) { - CubeEdge &edge = item->edge; + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, mgr.GetHypoRecycle()); - } - } - */ + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, mgr.GetHypoRecycle()); + } + } + */ } @@ -171,54 +171,53 @@ void Search::CreateSearchGraph(size_t stackInd) MemPool &pool = mgr.GetPool(); BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - const Bitmap &hypoBitmap = *val.first.first; - size_t hypoEndPos = val.first.second; - //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; - // create edges to next hypos from existing hypos - const InputPaths &paths = mgr.GetInputPaths(); + // create edges to next hypos from existing hypos + const InputPaths &paths = mgr.GetInputPaths(); - BOOST_FOREACH(const InputPath *path, paths) { - const Range &pathRange = path->range; - //cerr << "pathRange=" << pathRange << endl; + BOOST_FOREACH(const InputPath *path, paths) { + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; - if (!path->IsUsed()) { - continue; - } - if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { - continue; - } + if (!path->IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - // sort hypo for a particular bitmap and hypoEndPos - const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + // sort hypo for a particular bitmap and hypoEndPos + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - // add cube edge - size_t numPt = mgr.system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = path->targetPhrases[i]; - if (tps && tps->GetSize()) { - // create next mini stack - NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange); + // add cube edge + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + // create next mini stack + NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange); - CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap); + CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap); - CubeEdges *edges; - boost::unordered_map::iterator iter = m_cubeEdges.find(&nextMiniStack); - if (iter == m_cubeEdges.end()) { - edges = new (pool.Allocate()) CubeEdges(); - m_cubeEdges[&nextMiniStack] = edges; - } - else { - edges = iter->second; - } + CubeEdges *edges; + boost::unordered_map::iterator iter = m_cubeEdges.find(&nextMiniStack); + if (iter == m_cubeEdges.end()) { + edges = new (pool.Allocate()) CubeEdges(); + m_cubeEdges[&nextMiniStack] = edges; + } else { + edges = iter->second; + } - edges->push_back(edge); - } - } - } + edges->push_back(edge); + } + } + } } } @@ -226,42 +225,41 @@ void Search::CreateSearchGraph(size_t stackInd) const Hypothesis *Search::GetBestHypo() const { - const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back(); - std::vector sortedHypos = lastStack.GetBestHypos(1); + const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back(); + std::vector sortedHypos = lastStack.GetBestHypos(1); - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = sortedHypos[0]; - } - return best; + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; } void Search::DebugCounts() { - std::map counts; + std::map counts; - for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; - BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - size_t count = miniStack.GetColl().size(); + for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; + BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + size_t count = miniStack.GetColl().size(); - if (counts.find(count) == counts.end()) { - counts[count] = 0; - } - else { - ++counts[count]; - } - } - //cerr << m_stacks << endl; - } + if (counts.find(count) == counts.end()) { + counts[count] = 0; + } else { + ++counts[count]; + } + } + //cerr << m_stacks << endl; + } - std::map::const_iterator iter; - for (iter = counts.begin(); iter != counts.end(); ++iter) { - cerr << iter->first << "=" << iter->second << " "; - } - cerr << endl; + std::map::const_iterator iter; + for (iter = counts.begin(); iter != counts.end(); ++iter) { + cerr << iter->first << "=" << iter->second << " "; + } + cerr << endl; } diff --git a/moses2/defer/CubePruningPerBitmap/Search.h b/moses2/defer/CubePruningPerBitmap/Search.h index 913095e25..cb2164074 100644 --- a/moses2/defer/CubePruningPerBitmap/Search.h +++ b/moses2/defer/CubePruningPerBitmap/Search.h @@ -32,32 +32,32 @@ namespace NSCubePruningPerBitmap class Search : public Moses2::Search { public: - Search(Manager &mgr); - virtual ~Search(); + Search(Manager &mgr); + virtual ~Search(); - virtual void Decode(); - const Hypothesis *GetBestHypo() const; + virtual void Decode(); + const Hypothesis *GetBestHypo() const; protected: - Stacks m_stacks; + Stacks m_stacks; - CubeEdge::Queue m_queue; - CubeEdge::SeenPositions m_seenPositions; + CubeEdge::Queue m_queue; + CubeEdge::SeenPositions m_seenPositions; - // CUBE PRUNING VARIABLES - // setup - typedef std::vector CubeEdges; - boost::unordered_map m_cubeEdges; + // CUBE PRUNING VARIABLES + // setup + typedef std::vector CubeEdges; + boost::unordered_map m_cubeEdges; - std::deque m_queueItemRecycler; + std::deque m_queueItemRecycler; - // CUBE PRUNING - // decoding - void CreateSearchGraph(size_t stackInd); - void Decode(size_t stackInd); - void Decode(const std::vector &miniStacks); + // CUBE PRUNING + // decoding + void CreateSearchGraph(size_t stackInd); + void Decode(size_t stackInd); + void Decode(const std::vector &miniStacks); - void DebugCounts(); + void DebugCounts(); }; } diff --git a/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/moses2/defer/CubePruningPerBitmap/Stacks.cpp index ca29f52c0..9930f575e 100644 --- a/moses2/defer/CubePruningPerBitmap/Stacks.cpp +++ b/moses2/defer/CubePruningPerBitmap/Stacks.cpp @@ -18,7 +18,7 @@ namespace NSCubePruningPerBitmap { Stacks::Stacks(const Manager &mgr) -:m_mgr(mgr) + :m_mgr(mgr) { } @@ -28,18 +28,18 @@ Stacks::~Stacks() void Stacks::Init(size_t numStacks) { - m_stacks.resize(numStacks); - for (size_t i = 0; i < m_stacks.size(); ++i) { - m_stacks[i] = new (m_mgr.GetPool().Allocate()) NSCubePruningMiniStack::Stack(m_mgr); - } + m_stacks.resize(numStacks); + for (size_t i = 0; i < m_stacks.size(); ++i) { + m_stacks[i] = new (m_mgr.GetPool().Allocate()) NSCubePruningMiniStack::Stack(m_mgr); + } } std::ostream& operator<<(std::ostream &out, const Stacks &obj) { for (size_t i = 0; i < obj.GetSize(); ++i) { - const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i]; - out << stack.GetHypoSize() << " "; + const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i]; + out << stack.GetHypoSize() << " "; } return out; @@ -47,21 +47,21 @@ std::ostream& operator<<(std::ostream &out, const Stacks &obj) void Stacks::Add(const Hypothesis *hypo, Recycler &hypoRecycle) { - size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(hypo, hypoRecycle); + size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + stack.Add(hypo, hypoRecycle); } NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange) { - size_t numWordsCovered = newBitmap.GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + size_t numWordsCovered = newBitmap.GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; - NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos()); - stack.GetMiniStack(key); + NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos()); + stack.GetMiniStack(key); } diff --git a/moses2/defer/CubePruningPerBitmap/Stacks.h b/moses2/defer/CubePruningPerBitmap/Stacks.h index 5729fa613..28d939885 100644 --- a/moses2/defer/CubePruningPerBitmap/Stacks.h +++ b/moses2/defer/CubePruningPerBitmap/Stacks.h @@ -18,29 +18,33 @@ class Manager; namespace NSCubePruningPerBitmap { -class Stacks { - friend std::ostream& operator<<(std::ostream &, const Stacks &); +class Stacks +{ + friend std::ostream& operator<<(std::ostream &, const Stacks &); public: - Stacks(const Manager &mgr); - virtual ~Stacks(); + Stacks(const Manager &mgr); + virtual ~Stacks(); - void Init(size_t numStacks); + void Init(size_t numStacks); - size_t GetSize() const - { return m_stacks.size(); } + size_t GetSize() const { + return m_stacks.size(); + } - const NSCubePruningMiniStack::Stack &Back() const - { return *m_stacks.back(); } + const NSCubePruningMiniStack::Stack &Back() const { + return *m_stacks.back(); + } - NSCubePruningMiniStack::Stack &operator[](size_t ind) - { return *m_stacks[ind]; } + NSCubePruningMiniStack::Stack &operator[](size_t ind) { + return *m_stacks[ind]; + } - void Add(const Hypothesis *hypo, Recycler &hypoRecycle); - NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange); + void Add(const Hypothesis *hypo, Recycler &hypoRecycle); + NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange); protected: - const Manager &m_mgr; - std::vector m_stacks; + const Manager &m_mgr; + std::vector m_stacks; }; diff --git a/moses2/defer/CubePruningPerMiniStack/Misc.cpp b/moses2/defer/CubePruningPerMiniStack/Misc.cpp index 935882aa0..de8971362 100644 --- a/moses2/defer/CubePruningPerMiniStack/Misc.cpp +++ b/moses2/defer/CubePruningPerMiniStack/Misc.cpp @@ -20,83 +20,81 @@ namespace NSCubePruningPerMiniStack //////////////////////////////////////////////////////////////////////// QueueItem *QueueItem::Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler) + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler) { - QueueItem *ret; - if (currItem) { - // reuse incoming queue item to create new item - ret = currItem; - ret->Init(mgr, edge, hypoIndex, tpIndex); - } - else if (!queueItemRecycler.empty()) { - // use item from recycle bin - ret = queueItemRecycler.back(); - ret->Init(mgr, edge, hypoIndex, tpIndex); - queueItemRecycler.pop_back(); - } - else { - // create new item - ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); - } + QueueItem *ret; + if (currItem) { + // reuse incoming queue item to create new item + ret = currItem; + ret->Init(mgr, edge, hypoIndex, tpIndex); + } else if (!queueItemRecycler.empty()) { + // use item from recycle bin + ret = queueItemRecycler.back(); + ret->Init(mgr, edge, hypoIndex, tpIndex); + queueItemRecycler.pop_back(); + } else { + // create new item + ret = new (mgr.GetPool().Allocate()) QueueItem(mgr, edge, hypoIndex, tpIndex); + } - return ret; + return ret; } QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) -:edge(&edge) -,hypoIndex(hypoIndex) -,tpIndex(tpIndex) + :edge(&edge) + ,hypoIndex(hypoIndex) + ,tpIndex(tpIndex) { - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) { - this->edge = &edge; - this->hypoIndex = hypoIndex; - this->tpIndex = tpIndex; + this->edge = &edge; + this->hypoIndex = hypoIndex; + this->tpIndex = tpIndex; - CreateHypothesis(mgr); + CreateHypothesis(mgr); } void QueueItem::CreateHypothesis(Manager &mgr) { - const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex]; - const TargetPhrase &tp = edge->tps[tpIndex]; + const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex]; + const TargetPhrase &tp = edge->tps[tpIndex]; - //cerr << "hypoIndex=" << hypoIndex << endl; - //cerr << "edge.hypos=" << edge.hypos.size() << endl; - //cerr << prevHypo << endl; - //cerr << *prevHypo << endl; + //cerr << "hypoIndex=" << hypoIndex << endl; + //cerr << "edge.hypos=" << edge.hypos.size() << endl; + //cerr << prevHypo << endl; + //cerr << *prevHypo << endl; - hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); - hypo->EvaluateWhenApplied(); + hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); + hypo->EvaluateWhenApplied(); } //////////////////////////////////////////////////////////////////////// CubeEdge::CubeEdge( - Manager &mgr, - const NSCubePruningMiniStack::MiniStack &miniStack, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap) -:miniStack(miniStack) -,path(path) -,tps(tps) -,newBitmap(newBitmap) + Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap) + :miniStack(miniStack) + ,path(path) + ,tps(tps) + ,newBitmap(newBitmap) { - estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); + estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); } std::ostream& operator<<(std::ostream &out, const CubeEdge &obj) { - out << obj.newBitmap; - return out; + out << obj.newBitmap; + return out; } bool @@ -111,47 +109,47 @@ CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPos } void CubeEdge::CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - if (miniStack.GetSortedAndPruneHypos(mgr).size()) { - assert(tps.GetSize()); + if (miniStack.GetSortedAndPruneHypos(mgr).size()) { + assert(tps.GetSize()); - QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); - queue.push(item); - bool setSeen = SetSeenPosition(0, 0, seenPositions); - assert(setSeen); - } + QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); + queue.push(item); + bool setSeen = SetSeenPosition(0, 0, seenPositions); + assert(setSeen); + } } void CubeEdge::CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler) + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler) { - size_t hypoIndex = item->hypoIndex; - size_t tpIndex = item->tpIndex; + size_t hypoIndex = item->hypoIndex; + size_t tpIndex = item->tpIndex; - if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { - // reuse incoming queue item to create new item - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); - assert(newItem == item); - queue.push(newItem); - item = NULL; - } + if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { + // reuse incoming queue item to create new item + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); + assert(newItem == item); + queue.push(newItem); + item = NULL; + } - if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { - QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); - queue.push(newItem); - item = NULL; - } + if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); + queue.push(newItem); + item = NULL; + } - if (item) { - // recycle unused queue item - queueItemRecycler.push_back(item); - } + if (item) { + // recycle unused queue item + queueItemRecycler.push_back(item); + } } } diff --git a/moses2/defer/CubePruningPerMiniStack/Misc.h b/moses2/defer/CubePruningPerMiniStack/Misc.h index 4a3935422..511fd42f5 100644 --- a/moses2/defer/CubePruningPerMiniStack/Misc.h +++ b/moses2/defer/CubePruningPerMiniStack/Misc.h @@ -31,24 +31,24 @@ class CubeEdge; /////////////////////////////////////////// class QueueItem { - ~QueueItem(); // NOT IMPLEMENTED. Use MemPool + ~QueueItem(); // NOT IMPLEMENTED. Use MemPool public: - static QueueItem *Create(QueueItem *currItem, - Manager &mgr, - CubeEdge &edge, - size_t hypoIndex, - size_t tpIndex, - std::deque &queueItemRecycler); - QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + static QueueItem *Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque &queueItemRecycler); + QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); - CubeEdge *edge; - size_t hypoIndex, tpIndex; - Hypothesis *hypo; + CubeEdge *edge; + size_t hypoIndex, tpIndex; + Hypothesis *hypo; protected: - void CreateHypothesis(Manager &mgr); + void CreateHypothesis(Manager &mgr); }; /////////////////////////////////////////// @@ -56,8 +56,8 @@ class QueueItemOrderer { public: bool operator()(QueueItem* itemA, QueueItem* itemB) const { - HypothesisFutureScoreOrderer orderer; - return !orderer(itemA->hypo, itemB->hypo); + HypothesisFutureScoreOrderer orderer; + return !orderer(itemA->hypo, itemB->hypo); } }; @@ -67,39 +67,39 @@ class CubeEdge friend std::ostream& operator<<(std::ostream &, const CubeEdge &); public: - typedef std::priority_queue, - QueueItemOrderer> Queue; + typedef std::priority_queue, + QueueItemOrderer> Queue; - typedef std::pair SeenPositionItem; - typedef boost::unordered_set, - std::equal_to - > SeenPositions; + typedef std::pair SeenPositionItem; + typedef boost::unordered_set, + std::equal_to + > SeenPositions; - const NSCubePruningMiniStack::MiniStack &miniStack; - const InputPath &path; - const TargetPhrases &tps; - const Bitmap &newBitmap; - SCORE estimatedScore; + const NSCubePruningMiniStack::MiniStack &miniStack; + const InputPath &path; + const TargetPhrases &tps; + const Bitmap &newBitmap; + SCORE estimatedScore; - CubeEdge(Manager &mgr, - const NSCubePruningMiniStack::MiniStack &miniStack, - const InputPath &path, - const TargetPhrases &tps, - const Bitmap &newBitmap); + CubeEdge(Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap); bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const; void CreateFirst(Manager &mgr, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); void CreateNext(Manager &mgr, - QueueItem *item, - Queue &queue, - SeenPositions &seenPositions, - std::deque &queueItemRecycler); + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque &queueItemRecycler); protected: diff --git a/moses2/defer/CubePruningPerMiniStack/Search.cpp b/moses2/defer/CubePruningPerMiniStack/Search.cpp index fe993daf0..1de52cb3d 100644 --- a/moses2/defer/CubePruningPerMiniStack/Search.cpp +++ b/moses2/defer/CubePruningPerMiniStack/Search.cpp @@ -25,13 +25,13 @@ namespace NSCubePruningPerMiniStack //////////////////////////////////////////////////////////////////////// Search::Search(Manager &mgr) -:Moses2::Search(mgr) -,m_stacks(mgr) + :Moses2::Search(mgr) + ,m_stacks(mgr) -,m_queue(QueueItemOrderer(), - std::vector() ) + ,m_queue(QueueItemOrderer(), + std::vector() ) -,m_seenPositions() + ,m_seenPositions() { } @@ -41,102 +41,102 @@ Search::~Search() void Search::Decode() { - // init stacks - m_stacks.Init(mgr.GetInput().GetSize() + 1); + // init stacks + m_stacks.Init(mgr.GetInput().GetSize() + 1); - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); - m_stacks.Add(initHypo, mgr.GetHypoRecycle()); + m_stacks.Add(initHypo, mgr.GetHypoRecycle()); - for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) { - CreateSearchGraph(stackInd); - } + for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) { + CreateSearchGraph(stackInd); + } - for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - Decode(stackInd); + for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + Decode(stackInd); - //cerr << m_stacks << endl; - } + //cerr << m_stacks << endl; + } - //DebugCounts(); + //DebugCounts(); } void Search::Decode(size_t stackInd) { NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - Decode(miniStack); + NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + Decode(miniStack); } } void Search::Decode(NSCubePruningMiniStack::MiniStack &miniStack) { - Recycler &hypoRecycler = mgr.GetHypoRecycle(); + Recycler &hypoRecycler = mgr.GetHypoRecycle(); - // reuse queue from previous stack. Clear it first - std::vector &container = Container(m_queue); - //cerr << "container=" << container.size() << endl; - BOOST_FOREACH(QueueItem *item, container) { - // recycle unused hypos from queue - Hypothesis *hypo = item->hypo; - hypoRecycler.Recycle(hypo); + // reuse queue from previous stack. Clear it first + std::vector &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); - // recycle queue item - m_queueItemRecycler.push_back(item); - } - container.clear(); + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); - m_seenPositions.clear(); + m_seenPositions.clear(); - // add top hypo from every edge into queue - CubeEdges &edges = *m_cubeEdges[&miniStack]; + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[&miniStack]; - BOOST_FOREACH(CubeEdge *edge, edges) { - //cerr << "edge=" << *edge << endl; - edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); - } + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << "edge=" << *edge << endl; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } - size_t pops = 0; - while (!m_queue.empty() && pops < mgr.system.popLimit) { - // get best hypo from queue, add to stack - //cerr << "queue=" << queue.size() << endl; - QueueItem *item = m_queue.top(); - m_queue.pop(); + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); - CubeEdge *edge = item->edge; + CubeEdge *edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, hypoRecycler); + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, hypoRecycler); - edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); - ++pops; - } + ++pops; + } - /* - // create hypo from every edge. Increase diversity - while (!m_queue.empty()) { - QueueItem *item = m_queue.top(); - m_queue.pop(); + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); - if (item->hypoIndex == 0 && item->tpIndex == 0) { - CubeEdge &edge = item->edge; + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; - // add hypo to stack - Hypothesis *hypo = item->hypo; - //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; - m_stacks.Add(hypo, mgr.GetHypoRecycle()); - } - } - */ + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, mgr.GetHypoRecycle()); + } + } + */ } @@ -146,54 +146,53 @@ void Search::CreateSearchGraph(size_t stackInd) MemPool &pool = mgr.GetPool(); BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - const Bitmap &hypoBitmap = *val.first.first; - size_t hypoEndPos = val.first.second; - //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; - // create edges to next hypos from existing hypos - const InputPaths &paths = mgr.GetInputPaths(); + // create edges to next hypos from existing hypos + const InputPaths &paths = mgr.GetInputPaths(); - BOOST_FOREACH(const InputPath *path, paths) { - const Range &pathRange = path->range; - //cerr << "pathRange=" << pathRange << endl; + BOOST_FOREACH(const InputPath *path, paths) { + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; - if (!path->IsUsed()) { - continue; - } - if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { - continue; - } + if (!path->IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - // sort hypo for a particular bitmap and hypoEndPos - const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + // sort hypo for a particular bitmap and hypoEndPos + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - // add cube edge - size_t numPt = mgr.system.mappings.size(); - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = path->targetPhrases[i]; - if (tps && tps->GetSize()) { - // create next mini stack - NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange); + // add cube edge + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + // create next mini stack + NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange); - CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap); + CubeEdge *edge = new (pool.Allocate()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap); - CubeEdges *edges; - boost::unordered_map::iterator iter = m_cubeEdges.find(&nextMiniStack); - if (iter == m_cubeEdges.end()) { - edges = new (pool.Allocate()) CubeEdges(); - m_cubeEdges[&nextMiniStack] = edges; - } - else { - edges = iter->second; - } + CubeEdges *edges; + boost::unordered_map::iterator iter = m_cubeEdges.find(&nextMiniStack); + if (iter == m_cubeEdges.end()) { + edges = new (pool.Allocate()) CubeEdges(); + m_cubeEdges[&nextMiniStack] = edges; + } else { + edges = iter->second; + } - edges->push_back(edge); - } - } - } + edges->push_back(edge); + } + } + } } } @@ -201,42 +200,41 @@ void Search::CreateSearchGraph(size_t stackInd) const Hypothesis *Search::GetBestHypo() const { - const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back(); - std::vector sortedHypos = lastStack.GetBestHypos(1); + const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back(); + std::vector sortedHypos = lastStack.GetBestHypos(1); - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = sortedHypos[0]; - } - return best; + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; } void Search::DebugCounts() { - std::map counts; + std::map counts; - for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { - //cerr << "stackInd=" << stackInd << endl; - const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; - BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { - const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; - size_t count = miniStack.GetColl().size(); + for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; + BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + size_t count = miniStack.GetColl().size(); - if (counts.find(count) == counts.end()) { - counts[count] = 0; - } - else { - ++counts[count]; - } - } - //cerr << m_stacks << endl; - } + if (counts.find(count) == counts.end()) { + counts[count] = 0; + } else { + ++counts[count]; + } + } + //cerr << m_stacks << endl; + } - std::map::const_iterator iter; - for (iter = counts.begin(); iter != counts.end(); ++iter) { - cerr << iter->first << "=" << iter->second << " "; - } - cerr << endl; + std::map::const_iterator iter; + for (iter = counts.begin(); iter != counts.end(); ++iter) { + cerr << iter->first << "=" << iter->second << " "; + } + cerr << endl; } diff --git a/moses2/defer/CubePruningPerMiniStack/Search.h b/moses2/defer/CubePruningPerMiniStack/Search.h index be256360e..2adb9631c 100644 --- a/moses2/defer/CubePruningPerMiniStack/Search.h +++ b/moses2/defer/CubePruningPerMiniStack/Search.h @@ -32,32 +32,32 @@ namespace NSCubePruningPerMiniStack class Search : public Moses2::Search { public: - Search(Manager &mgr); - virtual ~Search(); + Search(Manager &mgr); + virtual ~Search(); - virtual void Decode(); - const Hypothesis *GetBestHypo() const; + virtual void Decode(); + const Hypothesis *GetBestHypo() const; protected: - Stacks m_stacks; + Stacks m_stacks; - CubeEdge::Queue m_queue; - CubeEdge::SeenPositions m_seenPositions; + CubeEdge::Queue m_queue; + CubeEdge::SeenPositions m_seenPositions; - // CUBE PRUNING VARIABLES - // setup - typedef std::vector CubeEdges; - boost::unordered_map m_cubeEdges; + // CUBE PRUNING VARIABLES + // setup + typedef std::vector CubeEdges; + boost::unordered_map m_cubeEdges; - std::deque m_queueItemRecycler; + std::deque m_queueItemRecycler; - // CUBE PRUNING - // decoding - void CreateSearchGraph(size_t stackInd); - void Decode(size_t stackInd); - void Decode(NSCubePruningMiniStack::MiniStack &miniStack); + // CUBE PRUNING + // decoding + void CreateSearchGraph(size_t stackInd); + void Decode(size_t stackInd); + void Decode(NSCubePruningMiniStack::MiniStack &miniStack); - void DebugCounts(); + void DebugCounts(); }; } diff --git a/moses2/defer/CubePruningPerMiniStack/Stacks.cpp b/moses2/defer/CubePruningPerMiniStack/Stacks.cpp index 86bf5d1b8..4e81e8e48 100644 --- a/moses2/defer/CubePruningPerMiniStack/Stacks.cpp +++ b/moses2/defer/CubePruningPerMiniStack/Stacks.cpp @@ -18,7 +18,7 @@ namespace NSCubePruningPerMiniStack { Stacks::Stacks(const Manager &mgr) -:m_mgr(mgr) + :m_mgr(mgr) { } @@ -28,18 +28,18 @@ Stacks::~Stacks() void Stacks::Init(size_t numStacks) { - m_stacks.resize(numStacks); - for (size_t i = 0; i < m_stacks.size(); ++i) { - m_stacks[i] = new (m_mgr.GetPool().Allocate()) NSCubePruningMiniStack::Stack(m_mgr); - } + m_stacks.resize(numStacks); + for (size_t i = 0; i < m_stacks.size(); ++i) { + m_stacks[i] = new (m_mgr.GetPool().Allocate()) NSCubePruningMiniStack::Stack(m_mgr); + } } std::ostream& operator<<(std::ostream &out, const Stacks &obj) { for (size_t i = 0; i < obj.GetSize(); ++i) { - const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i]; - out << stack.GetHypoSize() << " "; + const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i]; + out << stack.GetHypoSize() << " "; } return out; @@ -47,21 +47,21 @@ std::ostream& operator<<(std::ostream &out, const Stacks &obj) void Stacks::Add(const Hypothesis *hypo, Recycler &hypoRecycle) { - size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(hypo, hypoRecycle); + size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + stack.Add(hypo, hypoRecycle); } NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange) { - size_t numWordsCovered = newBitmap.GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + size_t numWordsCovered = newBitmap.GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; - NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos()); - stack.GetMiniStack(key); + NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos()); + stack.GetMiniStack(key); } diff --git a/moses2/defer/CubePruningPerMiniStack/Stacks.h b/moses2/defer/CubePruningPerMiniStack/Stacks.h index 94ebe4618..74469b767 100644 --- a/moses2/defer/CubePruningPerMiniStack/Stacks.h +++ b/moses2/defer/CubePruningPerMiniStack/Stacks.h @@ -18,29 +18,33 @@ class Manager; namespace NSCubePruningPerMiniStack { -class Stacks { - friend std::ostream& operator<<(std::ostream &, const Stacks &); +class Stacks +{ + friend std::ostream& operator<<(std::ostream &, const Stacks &); public: - Stacks(const Manager &mgr); - virtual ~Stacks(); + Stacks(const Manager &mgr); + virtual ~Stacks(); - void Init(size_t numStacks); + void Init(size_t numStacks); - size_t GetSize() const - { return m_stacks.size(); } + size_t GetSize() const { + return m_stacks.size(); + } - const NSCubePruningMiniStack::Stack &Back() const - { return *m_stacks.back(); } + const NSCubePruningMiniStack::Stack &Back() const { + return *m_stacks.back(); + } - NSCubePruningMiniStack::Stack &operator[](size_t ind) - { return *m_stacks[ind]; } + NSCubePruningMiniStack::Stack &operator[](size_t ind) { + return *m_stacks[ind]; + } - void Add(const Hypothesis *hypo, Recycler &hypoRecycle); - NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange); + void Add(const Hypothesis *hypo, Recycler &hypoRecycle); + NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange); protected: - const Manager &m_mgr; - std::vector m_stacks; + const Manager &m_mgr; + std::vector m_stacks; }; diff --git a/moses2/legacy/Bitmap.cpp b/moses2/legacy/Bitmap.cpp index a8dc7db4d..ed5ccd750 100644 --- a/moses2/legacy/Bitmap.cpp +++ b/moses2/legacy/Bitmap.cpp @@ -26,7 +26,7 @@ namespace Moses2 { Bitmap::Bitmap(MemPool &pool, size_t size) : - m_bitmap(pool, size) + m_bitmap(pool, size) { } @@ -47,9 +47,9 @@ void Bitmap::Init(const std::vector& initializer) // Find the first gap, and cache it. Array::const_iterator first_gap = std::find(m_bitmap.begin(), - m_bitmap.end(), false); + m_bitmap.end(), false); m_firstGap = ((first_gap == m_bitmap.end()) ? - NOT_FOUND: first_gap - m_bitmap.begin()); + NOT_FOUND: first_gap - m_bitmap.begin()); } void Bitmap::Init(const Bitmap ©, const Range &range) diff --git a/moses2/legacy/Bitmap.h b/moses2/legacy/Bitmap.h index e6a0f7948..3ceb9b01d 100644 --- a/moses2/legacy/Bitmap.h +++ b/moses2/legacy/Bitmap.h @@ -60,8 +60,7 @@ private: Bitmap& operator=(const Bitmap& other); /** Update the first gap, when bits are flipped */ - void UpdateFirstGap(size_t startPos, size_t endPos, bool value) - { + void UpdateFirstGap(size_t startPos, size_t endPos, bool value) { if (value) { //may remove gap if (startPos <= m_firstGap && m_firstGap <= endPos) { @@ -74,171 +73,168 @@ private: } } - } - else { + } else { //setting positions to false, may add new gap - if (startPos < m_firstGap) { - m_firstGap = startPos; - } - } - } - - //! set value between 2 positions, inclusive - void - SetValueNonOverlap(Range const& range) { - size_t startPos = range.GetStartPos(); - size_t endPos = range.GetEndPos(); - - for(size_t pos = startPos; pos <= endPos; pos++) { - m_bitmap[pos] = true; - } - - m_numWordsCovered += range.GetNumWordsCovered(); - UpdateFirstGap(startPos, endPos, true); - } - - public: - //! Create Bitmap of length size, and initialise with vector. - explicit Bitmap(MemPool &pool, size_t size); - - void Init(const std::vector& initializer); - void Init(const Bitmap ©, const Range &range); - - //! Count of words translated. - size_t GetNumWordsCovered() const { - return m_numWordsCovered; - } - - //! position of 1st word not yet translated, or NOT_FOUND if everything already translated - size_t GetFirstGapPos() const { - return m_firstGap; - } - - //! position of last word not yet translated, or NOT_FOUND if everything already translated - size_t GetLastGapPos() const { - for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) { - if (!m_bitmap[pos]) { - return pos; - } - } - // no starting pos - return NOT_FOUND; - } - - //! position of last translated word - size_t GetLastPos() const { - for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) { - if (m_bitmap[pos]) { - return pos; - } - } - // no starting pos - return NOT_FOUND; - } - - //! whether a word has been translated at a particular position - bool GetValue(size_t pos) const { - return bool(m_bitmap[pos]); - } - //! set value at a particular position - void SetValue( size_t pos, bool value ) { - bool origValue = m_bitmap[pos]; - if (origValue == value) { - // do nothing - } - else { - m_bitmap[pos] = value; - UpdateFirstGap(pos, pos, value); - if (value) { - ++m_numWordsCovered; - } - else { - --m_numWordsCovered; - } - } - } - - //! whether every word has been translated - bool IsComplete() const { - return GetSize() == GetNumWordsCovered(); - } - //! whether the wordrange overlaps with any translated word in this bitmap - bool Overlap(const Range &compare) const { - for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) { - if (m_bitmap[pos]) - return true; - } - return false; - } - //! number of elements - size_t GetSize() const { - return m_bitmap.size(); - } - - inline size_t GetEdgeToTheLeftOf(size_t l) const { - if (l == 0) return l; - while (l && !m_bitmap[l-1]) { - --l; - } - return l; - } - - inline size_t GetEdgeToTheRightOf(size_t r) const { - if (r+1 == m_bitmap.size()) return r; - return ( - std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) - - m_bitmap.begin() - ) - 1; - } - - //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16 - WordsBitmapID GetID() const { - assert(m_bitmap.size() < (1<<16)); - - size_t start = GetFirstGapPos(); - if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left - - size_t end = GetLastPos(); - if (end == NOT_FOUND) end = 0;// nothing translated yet - - assert(end < start || end-start <= 16); - WordsBitmapID id = 0; - for(size_t pos = end; pos > start; pos--) { - id = id*2 + (int) GetValue(pos); - } - return id + (1<<16) * start; - } - - //! converts bitmap into an integer ID, with an additional span covered - WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const { - assert(m_bitmap.size() < (1<<16)); - - size_t start = GetFirstGapPos(); - if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left - - size_t end = GetLastPos(); - if (end == NOT_FOUND) end = 0;// nothing translated yet - - if (start == startPos) start = endPos+1; - if (end < endPos) end = endPos; - - assert(end < start || end-start <= 16); - WordsBitmapID id = 0; - for(size_t pos = end; pos > start; pos--) { - id = id*2; - if (GetValue(pos) || (startPos<=pos && pos<=endPos)) - id++; - } - return id + (1<<16) * start; - } - - // for unordered_set in stack - size_t hash() const; - bool operator==(const Bitmap& other) const; - bool operator!=(const Bitmap& other) const { - return !(*this == other); - } - - }; - + if (startPos < m_firstGap) { + m_firstGap = startPos; + } } + } + + //! set value between 2 positions, inclusive + void + SetValueNonOverlap(Range const& range) { + size_t startPos = range.GetStartPos(); + size_t endPos = range.GetEndPos(); + + for(size_t pos = startPos; pos <= endPos; pos++) { + m_bitmap[pos] = true; + } + + m_numWordsCovered += range.GetNumWordsCovered(); + UpdateFirstGap(startPos, endPos, true); + } + +public: + //! Create Bitmap of length size, and initialise with vector. + explicit Bitmap(MemPool &pool, size_t size); + + void Init(const std::vector& initializer); + void Init(const Bitmap ©, const Range &range); + + //! Count of words translated. + size_t GetNumWordsCovered() const { + return m_numWordsCovered; + } + + //! position of 1st word not yet translated, or NOT_FOUND if everything already translated + size_t GetFirstGapPos() const { + return m_firstGap; + } + + //! position of last word not yet translated, or NOT_FOUND if everything already translated + size_t GetLastGapPos() const { + for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) { + if (!m_bitmap[pos]) { + return pos; + } + } + // no starting pos + return NOT_FOUND; + } + + //! position of last translated word + size_t GetLastPos() const { + for (int pos = int(m_bitmap.size()) - 1; pos >= 0; pos--) { + if (m_bitmap[pos]) { + return pos; + } + } + // no starting pos + return NOT_FOUND; + } + + //! whether a word has been translated at a particular position + bool GetValue(size_t pos) const { + return bool(m_bitmap[pos]); + } + //! set value at a particular position + void SetValue( size_t pos, bool value ) { + bool origValue = m_bitmap[pos]; + if (origValue == value) { + // do nothing + } else { + m_bitmap[pos] = value; + UpdateFirstGap(pos, pos, value); + if (value) { + ++m_numWordsCovered; + } else { + --m_numWordsCovered; + } + } + } + + //! whether every word has been translated + bool IsComplete() const { + return GetSize() == GetNumWordsCovered(); + } + //! whether the wordrange overlaps with any translated word in this bitmap + bool Overlap(const Range &compare) const { + for (size_t pos = compare.GetStartPos(); pos <= compare.GetEndPos(); pos++) { + if (m_bitmap[pos]) + return true; + } + return false; + } + //! number of elements + size_t GetSize() const { + return m_bitmap.size(); + } + + inline size_t GetEdgeToTheLeftOf(size_t l) const { + if (l == 0) return l; + while (l && !m_bitmap[l-1]) { + --l; + } + return l; + } + + inline size_t GetEdgeToTheRightOf(size_t r) const { + if (r+1 == m_bitmap.size()) return r; + return ( + std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) - + m_bitmap.begin() + ) - 1; + } + + //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16 + WordsBitmapID GetID() const { + assert(m_bitmap.size() < (1<<16)); + + size_t start = GetFirstGapPos(); + if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left + + size_t end = GetLastPos(); + if (end == NOT_FOUND) end = 0;// nothing translated yet + + assert(end < start || end-start <= 16); + WordsBitmapID id = 0; + for(size_t pos = end; pos > start; pos--) { + id = id*2 + (int) GetValue(pos); + } + return id + (1<<16) * start; + } + + //! converts bitmap into an integer ID, with an additional span covered + WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const { + assert(m_bitmap.size() < (1<<16)); + + size_t start = GetFirstGapPos(); + if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left + + size_t end = GetLastPos(); + if (end == NOT_FOUND) end = 0;// nothing translated yet + + if (start == startPos) start = endPos+1; + if (end < endPos) end = endPos; + + assert(end < start || end-start <= 16); + WordsBitmapID id = 0; + for(size_t pos = end; pos > start; pos--) { + id = id*2; + if (GetValue(pos) || (startPos<=pos && pos<=endPos)) + id++; + } + return id + (1<<16) * start; + } + + // for unordered_set in stack + size_t hash() const; + bool operator==(const Bitmap& other) const; + bool operator!=(const Bitmap& other) const { + return !(*this == other); + } + +}; + +} diff --git a/moses2/legacy/Bitmaps.cpp b/moses2/legacy/Bitmaps.cpp index 879ad9d71..b1fee5ea6 100644 --- a/moses2/legacy/Bitmaps.cpp +++ b/moses2/legacy/Bitmaps.cpp @@ -8,7 +8,7 @@ namespace Moses2 { Bitmaps::Bitmaps(MemPool &pool) : - m_pool(pool) + m_pool(pool) { } @@ -17,7 +17,7 @@ Bitmaps::~Bitmaps() } void Bitmaps::Init(size_t inputSize, - const std::vector &initSourceCompleted) + const std::vector &initSourceCompleted) { m_initBitmap = new (m_pool.Allocate()) Bitmap(m_pool, inputSize); m_initBitmap->Init(initSourceCompleted); @@ -29,8 +29,7 @@ const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range) Bitmap *newBM; if (m_recycler.empty()) { newBM = new (m_pool.Allocate()) Bitmap(m_pool, bm.GetSize()); - } - else { + } else { newBM = m_recycler.top(); m_recycler.pop(); } @@ -41,8 +40,7 @@ const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range) if (iter == m_coll.end()) { m_coll[newBM] = NextBitmaps(); return *newBM; - } - else { + } else { m_recycler.push(newBM); return *iter->first; @@ -61,8 +59,7 @@ const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range) // not seen the link yet. newBM = &GetNextBitmap(bm, range); next[&range] = newBM; - } - else { + } else { // link exist //std::cerr << "link exists" << endl; newBM = iterNext->second; diff --git a/moses2/legacy/Bitmaps.h b/moses2/legacy/Bitmaps.h index d8207b59e..aa0ea8f82 100644 --- a/moses2/legacy/Bitmaps.h +++ b/moses2/legacy/Bitmaps.h @@ -15,7 +15,7 @@ class Bitmaps { typedef boost::unordered_map NextBitmaps; typedef boost::unordered_map, UnorderedComparer > Coll; + UnorderedComparer, UnorderedComparer > Coll; //typedef std::set > Coll; Coll m_coll; Bitmap *m_initBitmap; @@ -29,8 +29,7 @@ public: virtual ~Bitmaps(); void Init(size_t inputSize, const std::vector &initSourceCompleted); - const Bitmap &GetInitialBitmap() const - { + const Bitmap &GetInitialBitmap() const { return *m_initBitmap; } const Bitmap &GetBitmap(const Bitmap &bm, const Range &range); diff --git a/moses2/legacy/Factor.h b/moses2/legacy/Factor.h index 99d53f4f0..541f2364a 100644 --- a/moses2/legacy/Factor.h +++ b/moses2/legacy/Factor.h @@ -49,14 +49,12 @@ class Factor size_t m_id; //! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects - Factor() - { + Factor() { } // Needed for STL containers. They'll delegate through FactorFriend, which is never exposed publicly. Factor(const Factor &factor) : - m_string(factor.m_string), m_id(factor.m_id) - { + m_string(factor.m_string), m_id(factor.m_id) { } // Not implemented. Shouldn't be called. @@ -64,13 +62,11 @@ class Factor public: //! original string representation of the factor - StringPiece GetString() const - { + StringPiece GetString() const { return m_string; } //! contiguous ID - inline size_t GetId() const - { + inline size_t GetId() const { return m_id; } @@ -79,21 +75,18 @@ public: * +1 = more than * 0 = same */ - inline int Compare(const Factor &compare) const - { + inline int Compare(const Factor &compare) const { if (this < &compare) return -1; if (this > &compare) return 1; return 0; } //! transitive comparison used for adding objects into FactorCollection - inline bool operator<(const Factor &compare) const - { + inline bool operator<(const Factor &compare) const { return this < &compare; } // quick equality comparison. Not used - inline bool operator==(const Factor &compare) const - { + inline bool operator==(const Factor &compare) const { return this == &compare; } }; diff --git a/moses2/legacy/FactorCollection.cpp b/moses2/legacy/FactorCollection.cpp index f8beb9b40..80081bab9 100644 --- a/moses2/legacy/FactorCollection.cpp +++ b/moses2/legacy/FactorCollection.cpp @@ -55,14 +55,13 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, std::pair ret(set.insert(to_ins)); if (ret.second) { ret.first->in.m_string.set( - memcpy(m_string_backing.Allocate(factorString.size()), - factorString.data(), factorString.size()), factorString.size()); + memcpy(m_string_backing.Allocate(factorString.size()), + factorString.data(), factorString.size()), factorString.size()); if (isNonTerminal) { m_factorIdNonTerminal++; UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, - "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile"); - } - else { + "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile"); + } else { m_factorId++; } } @@ -101,7 +100,7 @@ ostream& operator<<(ostream& out, const FactorCollection& factorCollection) boost::shared_lock lock(factorCollection.m_accessLock); #endif for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin(); - i != factorCollection.m_set.end(); ++i) { + i != factorCollection.m_set.end(); ++i) { out << i->in; } return out; diff --git a/moses2/legacy/FactorCollection.h b/moses2/legacy/FactorCollection.h index 0430e5cde..1b29dee69 100644 --- a/moses2/legacy/FactorCollection.h +++ b/moses2/legacy/FactorCollection.h @@ -52,8 +52,7 @@ class System; * FactorFriend's public copy constructor and everybody else sees Factor's * private copy constructor. */ -struct FactorFriend -{ +struct FactorFriend { Factor in; }; @@ -71,19 +70,15 @@ class FactorCollection friend class System; struct HashFactor: public std::unary_function - { - std::size_t operator()(const FactorFriend &factor) const - { + std::size_t> { + std::size_t operator()(const FactorFriend &factor) const { return util::MurmurHashNative(factor.in.m_string.data(), - factor.in.m_string.size()); + factor.in.m_string.size()); } }; struct EqualsFactor: public std::binary_function - { - bool operator()(const FactorFriend &left, const FactorFriend &right) const - { + const FactorFriend &, bool> { + bool operator()(const FactorFriend &left, const FactorFriend &right) const { return left.in.GetString() == right.in.GetString(); } }; @@ -103,8 +98,7 @@ class FactorCollection //! constructor. only the 1 static variable can be created FactorCollection() : - m_factorIdNonTerminal(0), m_factorId(moses_MaxNumNonterminals) - { + m_factorIdNonTerminal(0), m_factorId(moses_MaxNumNonterminals) { } public: @@ -114,15 +108,14 @@ public: * If a factor already exist in the collection, return the existing factor, if not create a new 1 */ const Factor *AddFactor(const StringPiece &factorString, const System &system, - bool isNonTerminal); + bool isNonTerminal); - size_t GetNumNonTerminals() - { + size_t GetNumNonTerminals() { return m_factorIdNonTerminal; } const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal = - false); + false); }; diff --git a/moses2/legacy/InputFileStream.cpp b/moses2/legacy/InputFileStream.cpp index a68ea53ef..25bb156fe 100644 --- a/moses2/legacy/InputFileStream.cpp +++ b/moses2/legacy/InputFileStream.cpp @@ -29,12 +29,11 @@ namespace Moses2 { InputFileStream::InputFileStream(const std::string &filePath) : - std::istream(NULL), m_streambuf(NULL) + std::istream(NULL), m_streambuf(NULL) { if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") { m_streambuf = new gzfilebuf(filePath.c_str()); - } - else { + } else { std::filebuf* fb = new std::filebuf(); fb = fb->open(filePath.c_str(), std::ios::in); if (!fb) { diff --git a/moses2/legacy/Matrix.h b/moses2/legacy/Matrix.h index 6c498b53d..ddfde9027 100644 --- a/moses2/legacy/Matrix.h +++ b/moses2/legacy/Matrix.h @@ -39,16 +39,14 @@ protected: public: Matrix(MemPool &pool, size_t rows, size_t cols) : - m_rows(rows), m_cols(cols) - { + m_rows(rows), m_cols(cols) { m_array = pool.Allocate(rows * cols); } ~Matrix(); // not implemented // set upper triangle - void InitTriangle(const T &val) - { + void InitTriangle(const T &val) { assert(m_rows == m_cols); for (size_t row = 0; row < m_rows; row++) { for (size_t col = row; col < m_cols; col++) { @@ -58,8 +56,7 @@ public: } // everything - void Init(const T &val) - { + void Init(const T &val) { for (size_t row = 0; row < m_rows; row++) { for (size_t col = 0; col < m_cols; col++) { SetValue(row, col, val); @@ -68,36 +65,30 @@ public: } /** Returns length of the square: typically the sentence length */ - inline size_t GetSize() const - { + inline size_t GetSize() const { assert(m_rows == m_cols); return m_rows; } - inline size_t GetRows() const - { + inline size_t GetRows() const { return m_rows; } - inline size_t GetCols() const - { + inline size_t GetCols() const { return m_cols; } /** Get a future cost score for a span */ - inline const T &GetValue(size_t row, size_t col) const - { + inline const T &GetValue(size_t row, size_t col) const { return m_array[row * m_cols + col]; } - inline T &GetValue(size_t row, size_t col) - { + inline T &GetValue(size_t row, size_t col) { return m_array[row * m_cols + col]; } /** Set a future cost score for a span */ - inline void SetValue(size_t row, size_t col, const T &value) - { + inline void SetValue(size_t row, size_t col, const T &value) { m_array[row * m_cols + col] = value; } }; diff --git a/moses2/legacy/OutputCollector.h b/moses2/legacy/OutputCollector.h index 5504d9add..fdd54c5a2 100644 --- a/moses2/legacy/OutputCollector.h +++ b/moses2/legacy/OutputCollector.h @@ -43,28 +43,24 @@ class OutputCollector { public: OutputCollector(std::ostream* outStream = &std::cout, - std::ostream* debugStream = &std::cerr) : - m_nextOutput(0), m_outStream(outStream), m_debugStream(debugStream), m_isHoldingOutputStream( - false), m_isHoldingDebugStream(false) - { + std::ostream* debugStream = &std::cerr) : + m_nextOutput(0), m_outStream(outStream), m_debugStream(debugStream), m_isHoldingOutputStream( + false), m_isHoldingDebugStream(false) { } OutputCollector(std::string xout, std::string xerr = "") : - m_nextOutput(0) - { + m_nextOutput(0) { // TO DO open magic streams instead of regular ofstreams! [UG] if (xout == "/dev/stderr") { m_outStream = &std::cerr; m_isHoldingOutputStream = false; - } - else if (xout.size() && xout != "/dev/stdout" && xout != "-") { + } else if (xout.size() && xout != "/dev/stdout" && xout != "-") { m_outStream = new std::ofstream(xout.c_str()); UTIL_THROW_IF2(!m_outStream->good(), - "Failed to open output file" << xout); + "Failed to open output file" << xout); m_isHoldingOutputStream = true; - } - else { + } else { m_outStream = &std::cout; m_isHoldingOutputStream = false; } @@ -72,37 +68,31 @@ public: if (xerr == "/dev/stdout") { m_debugStream = &std::cout; m_isHoldingDebugStream = false; - } - else if (xerr.size() && xerr != "/dev/stderr") { + } else if (xerr.size() && xerr != "/dev/stderr") { m_debugStream = new std::ofstream(xerr.c_str()); UTIL_THROW_IF2(!m_debugStream->good(), - "Failed to open debug stream" << xerr); + "Failed to open debug stream" << xerr); m_isHoldingDebugStream = true; - } - else { + } else { m_debugStream = &std::cerr; m_isHoldingDebugStream = false; } } - ~OutputCollector() - { + ~OutputCollector() { if (m_isHoldingOutputStream) delete m_outStream; if (m_isHoldingDebugStream) delete m_debugStream; } - void HoldOutputStream() - { + void HoldOutputStream() { m_isHoldingOutputStream = true; } - void HoldDebugStream() - { + void HoldDebugStream() { m_isHoldingDebugStream = true; } - bool OutputIsCout() const - { + bool OutputIsCout() const { return (m_outStream == &std::cout); } @@ -110,8 +100,7 @@ public: * Write or cache the output, as appropriate. **/ void Write(int sourceId, const std::string& output, const std::string& debug = - "") - { + "") { #ifdef WITH_THREADS boost::mutex::scoped_lock lock(m_mutex); #endif @@ -126,15 +115,14 @@ public: *m_outStream << iter->second << std::flush; ++m_nextOutput; std::map::iterator debugIter = m_debugs.find( - iter->first); + iter->first); m_outputs.erase(iter); if (debugIter != m_debugs.end()) { *m_debugStream << debugIter->second << std::flush; m_debugs.erase(debugIter); } } - } - else { + } else { //save for later m_outputs[sourceId] = output; m_debugs[sourceId] = debug; @@ -154,8 +142,7 @@ private: #endif public: - void SetOutputStream(std::ostream* outStream) - { + void SetOutputStream(std::ostream* outStream) { m_outStream = outStream; } diff --git a/moses2/legacy/OutputFileStream.cpp b/moses2/legacy/OutputFileStream.cpp index ad46f3a0c..81047ffe1 100644 --- a/moses2/legacy/OutputFileStream.cpp +++ b/moses2/legacy/OutputFileStream.cpp @@ -31,12 +31,12 @@ using namespace boost::algorithm; namespace Moses2 { OutputFileStream::OutputFileStream() : - boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false) + boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false) { } OutputFileStream::OutputFileStream(const std::string &filePath) : - m_outFile(NULL), m_open(false) + m_outFile(NULL), m_open(false) { Open(filePath); } @@ -52,10 +52,9 @@ bool OutputFileStream::Open(const std::string &filePath) if (filePath == std::string("-")) { // Write to standard output. Leave m_outFile null. this->push(std::cout); - } - else { + } else { m_outFile = new ofstream(filePath.c_str(), - ios_base::out | ios_base::binary); + ios_base::out | ios_base::binary); if (m_outFile->fail()) { return false; } diff --git a/moses2/legacy/Parameter.cpp b/moses2/legacy/Parameter.cpp index 5cb88645e..6ba30f651 100644 --- a/moses2/legacy/Parameter.cpp +++ b/moses2/legacy/Parameter.cpp @@ -50,7 +50,7 @@ Parameter::Parameter() po::options_description main_opts("Main Options"); AddParam(main_opts, "config", "f", "location of the configuration file"); AddParam(main_opts, "input-file", "i", - "location of the input file to be translated"); + "location of the input file to be translated"); AddParam(main_opts, "verbose", "v", "verbosity level of the logging"); AddParam(main_opts, "show-weights", "print feature weights and exit"); @@ -65,7 +65,7 @@ Parameter::Parameter() // one should be able to specify different factor delimiters for intput and output AddParam(factor_opts, "mapping", "description of decoding steps"); // whatever that means ... AddParam(factor_opts, "placeholder-factor", - "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model"); + "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model"); /////////////////////////////////////////////////////////////////////////////////////// // general search options @@ -82,11 +82,11 @@ Parameter::Parameter() desc += "9=forest-to-string"; AddParam(search_opts, "search-algorithm", desc); AddParam(search_opts, "beam-threshold", "b", - "threshold for threshold pruning"); + "threshold for threshold pruning"); //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", - "maximum stack size for histogram pruning. 0 = unlimited stack size"); + "maximum stack size for histogram pruning. 0 = unlimited stack size"); //AddParam(search_opts, "stack-diversity", "sd", // "minimum number of hypothesis of each coverage in stack (default 0)"); @@ -94,18 +94,18 @@ Parameter::Parameter() //AddParam(search_opts, "weight-file", "wf", // "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini"); AddParam(search_opts, "weight", - "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated"); + "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated"); AddParam(search_opts, "feature-overwrite", - "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\""); + "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\""); po::options_description tune_opts("Options used in tuning."); AddParam(tune_opts, "weight-overwrite", - "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument"); + "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument"); AddParam(tune_opts, "feature-add", - "Add a feature function on the command line. Used by mira to add BLEU feature"); + "Add a feature function on the command line. Used by mira to add BLEU feature"); AddParam(tune_opts, "weight-add", - "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line"); + "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line"); // phrase table limitations: //AddParam(search_opts, "max-partial-trans-opt", @@ -113,7 +113,7 @@ Parameter::Parameter() //AddParam(search_opts, "max-trans-opt-per-coverage", // "maximum number of translation options per input span (after applying mapping steps)"); AddParam(search_opts, "max-phrase-length", - "maximum phrase length (default 20)"); + "maximum phrase length (default 20)"); //AddParam(search_opts, "translation-option-threshold", "tot", // "threshold for translation options relative to best for input phrase"); @@ -123,14 +123,14 @@ Parameter::Parameter() //AddParam(search_opts, "phrase-drop-allowed", "da", // "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison AddParam(search_opts, "threads", "th", - "number of threads to use in decoding (defaults to single-threaded)"); + "number of threads to use in decoding (defaults to single-threaded)"); // distortion options po::options_description disto_opts("Distortion options"); AddParam(disto_opts, "distortion-limit", "dl", - "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)"); + "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)"); AddParam(disto_opts, "monotone-at-punctuation", "mp", - "do not reorder over punctuation"); + "do not reorder over punctuation"); //AddParam(disto_opts, "early-distortion-cost", "edc", // "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no"); //AddParam(disto_opts, "distortion", @@ -139,18 +139,18 @@ Parameter::Parameter() // cube pruning po::options_description cube_opts("Cube pruning options."); AddParam(cube_opts, "cube-pruning-pop-limit", "cbp", - "How many hypotheses should be popped for each stack. (default = 1000)"); + "How many hypotheses should be popped for each stack. (default = 1000)"); AddParam(cube_opts, "cube-pruning-diversity", "cbd", - "How many hypotheses should be created for each coverage. (default = 0)"); + "How many hypotheses should be created for each coverage. (default = 0)"); AddParam(cube_opts, "cube-pruning-lazy-scoring", "cbls", - "Don't fully score a hypothesis until it is popped"); + "Don't fully score a hypothesis until it is popped"); //AddParam(cube_opts, "cube-pruning-deterministic-search", "cbds", // "Break ties deterministically during search"); /////////////////////////////////////////////////////////////////////////////////////// // minimum bayes risk decoding po::options_description mbr_opts( - "Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding"); + "Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding"); //AddParam(mbr_opts, "minimum-bayes-risk", "mbr", // "use miminum Bayes risk to determine best translation"); @@ -179,12 +179,12 @@ Parameter::Parameter() // OOV handling options po::options_description oov_opts("OOV Handling Options"); AddParam(oov_opts, "drop-unknown", "du", - "drop unknown words instead of copying them"); + "drop unknown words instead of copying them"); AddParam(oov_opts, "mark-unknown", "mu", "mark unknown words in output"); AddParam(oov_opts, "unknown-word-prefix", - "prefix to unknwon word when marked (default: 'UNK')"); + "prefix to unknwon word when marked (default: 'UNK')"); AddParam(oov_opts, "unknown-word-suffix", - "suffix to unknwon word when marked (default: '')"); + "suffix to unknwon word when marked (default: '')"); //AddParam(oov_opts, "lmodel-oov-feature", // "add language model oov feature, one per model"); //AddParam(oov_opts, "output-unknowns", @@ -197,9 +197,9 @@ Parameter::Parameter() po::options_description input_opts("Input Format Options"); AddParam(input_opts, "input-factors", "list of factors in the input"); AddParam(input_opts, "inputtype", - "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)"); + "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)"); AddParam(input_opts, "xml-input", "xi", - "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'"); + "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'"); //AddParam(input_opts, "xml-brackets", "xb", // "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode"); //AddParam(input_opts, "start-translation-id", "Id of 1st input. Default = 0"); @@ -221,10 +221,10 @@ Parameter::Parameter() //AddParam(output_opts, "print-all-derivations", // "to print all derivations in search graph"); AddParam(output_opts, "translation-details", "T", - "for each best hypothesis, report translation details to the given file"); + "for each best hypothesis, report translation details to the given file"); AddParam(output_opts, "output-hypo-score", - "Output the hypo score to stdout with the output string. For search error analysis. Default is false"); + "Output the hypo score to stdout with the output string. For search error analysis. Default is false"); //AddParam(output_opts, "output-word-graph", "owg", // "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos"); //AddParam(output_opts, "tree-translation-details", "Ttree", @@ -236,9 +236,9 @@ Parameter::Parameter() //AddParam(output_opts, "sort-word-alignment", // "Sort word alignments for more consistent display. 0=no sort (default), 1=target order"); AddParam(output_opts, "report-segmentation", "t", - "report phrase segmentation in the output"); + "report phrase segmentation in the output"); AddParam(output_opts, "report-segmentation-enriched", "tt", - "report phrase segmentation in the output with additional information"); + "report phrase segmentation in the output with additional information"); // translation-all-details was introduced in the context of DIMwid: Decoder Inspection for Moses (using Widgets) // see here: https://ufal.mff.cuni.cz/pbml/100/art-kurtz-seemann-braune-maletti.pdf @@ -266,7 +266,7 @@ Parameter::Parameter() // nbest-options po::options_description nbest_opts("N-best Options"); AddParam(nbest_opts, "n-best-list", - "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT"); + "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT"); // AddParam(nbest_opts,"n-best-list-file", "file of n-best-list to be generated; specify - as the file in order to write to STDOUT"); // AddParam(nbest_opts,"n-best-list-size", "size of n-best-list to be generated; specify - as the file in order to write to STDOUT"); //AddParam(nbest_opts, "labeled-n-best-list", @@ -274,7 +274,7 @@ Parameter::Parameter() //AddParam(nbest_opts, "n-best-trees", // "Write n-best target-side trees to n-best-list"); AddParam(nbest_opts, "n-best-factor", - "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0"); + "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0"); //AddParam(nbest_opts, "report-all-factors-in-n-best", // "Report all factors in n-best-lists. Default is false"); //AddParam(nbest_opts, "lattice-samples", @@ -296,7 +296,7 @@ Parameter::Parameter() // string("Max. number of sessions cached.") // + "Least recently used session is dumped first."); AddParam(server_opts, "serial", - "Run server in serial mode, processing only one request at a time."); + "Run server in serial mode, processing only one request at a time."); AddParam(server_opts,"server-maxconn", "Max. No of simultaneous HTTP transactions allowed by the server."); @@ -315,9 +315,9 @@ Parameter::Parameter() po::options_description chart_opts("Chart Decoding Options"); AddParam(chart_opts, "max-chart-span", - "maximum num. of source word chart rules can consume (default 10)"); + "maximum num. of source word chart rules can consume (default 10)"); AddParam(chart_opts, "non-terminals", - "list of non-term symbols, space separated"); + "list of non-term symbols, space separated"); //AddParam(chart_opts, "rule-limit", // "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE"); //AddParam(chart_opts, "source-label-overlap", @@ -338,7 +338,7 @@ Parameter::Parameter() //AddParam(o,"continue-partial-translation", "cpt", "start from nonempty hypothesis"); AddParam(misc_opts, "decoding-graph-backoff", "dpb", - "only use subsequent decoding paths for unknown spans of given length"); + "only use subsequent decoding paths for unknown spans of given length"); //AddParam(misc_opts, "references", // "Reference file(s) - used for bleu score feature"); //AddParam(misc_opts, "recover-input-path", "r", @@ -357,11 +357,11 @@ Parameter::Parameter() // "Context window (in words) for context-sensitive translation: {+|-|+-}."); AddParam(misc_opts, "cpu-affinity-offset", "CPU Affinity. Default = -1 (no affinity)"); AddParam(misc_opts, "cpu-affinity-increment", - "Set to 1 (default) to put each thread on different cores. 0 to run all threads on one core"); + "Set to 1 (default) to put each thread on different cores. 0 to run all threads on one core"); // Compact phrase table and reordering table. po::options_description cpt_opts( - "Options when using compact phrase and reordering tables."); + "Options when using compact phrase and reordering tables."); //AddParam(cpt_opts, "minphr-memory", // "Load phrase table in minphr format into memory"); //AddParam(cpt_opts, "minlexr-memory", @@ -376,7 +376,7 @@ Parameter::Parameter() // DEPRECATED options po::options_description deprec_opts("Deprecated Options"); AddParam(deprec_opts, "text-type", - "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features"); + "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features"); /* AddParam(deprec_opts, "link-param-count", @@ -431,11 +431,11 @@ Parameter::Parameter() "DEPRECATED. DO NOT USE. location and properties of the language models"); AddParam(deprec_opts, "lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models"); -#ifdef HAVE_SYNLM + #ifdef HAVE_SYNLM AddParam(deprec_opts,"slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)"); AddParam(deprec_opts,"slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model"); AddParam(deprec_opts,"slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser"); -#endif + #endif AddParam(deprec_opts, "ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables"); AddParam(deprec_opts, "phrase-pair-feature", @@ -494,8 +494,7 @@ const PARAM_VEC *Parameter::GetParam(const std::string ¶mName) const PARAM_MAP::const_iterator iter = m_setting.find(paramName); if (iter == m_setting.end()) { return NULL; - } - else { + } else { return &iter->second; } @@ -503,7 +502,7 @@ const PARAM_VEC *Parameter::GetParam(const std::string ¶mName) const /** initialize a parameter, sub of constructor */ void Parameter::AddParam(po::options_description& optgroup, - string const& paramName, string const& description) + string const& paramName, string const& description) { m_valid[paramName] = true; m_description[paramName] = description; @@ -512,8 +511,8 @@ void Parameter::AddParam(po::options_description& optgroup, /** initialize a parameter (including abbreviation), sub of constructor */ void Parameter::AddParam(po::options_description& optgroup, - string const& paramName, string const& abbrevName, - string const& description) + string const& paramName, string const& abbrevName, + string const& description) { m_valid[paramName] = true; m_valid[abbrevName] = true; @@ -581,7 +580,7 @@ bool Parameter::LoadParam(int argc, char* xargv[]) // config file (-f) arg mandatory string configPath; if ((configPath = FindParam("-f", argc, argv)) == "" && (configPath = - FindParam("-config", argc, argv)) == "") { + FindParam("-config", argc, argv)) == "") { PrintCredit(); Explain(); FeatureRegistry::Instance().PrintFF(); @@ -590,8 +589,7 @@ bool Parameter::LoadParam(int argc, char* xargv[]) cerr << "No configuration file was specified. Use -config or -f"; cerr << endl; return false; - } - else { + } else { if (!ReadConfigFile(configPath)) { std::cerr << "Could not read " << configPath; return false; @@ -600,14 +598,14 @@ bool Parameter::LoadParam(int argc, char* xargv[]) // overwrite parameters with values from switches for (PARAM_STRING::const_iterator iterParam = m_description.begin(); - iterParam != m_description.end(); iterParam++) { + iterParam != m_description.end(); iterParam++) { const string paramName = iterParam->first; OverwriteParam("-" + paramName, paramName, argc, argv); } // ... also shortcuts for (PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); - iterParam != m_abbreviation.end(); iterParam++) { + iterParam != m_abbreviation.end(); iterParam++) { const string paramName = iterParam->first; const string paramShortName = iterParam->second; OverwriteParam("-" + paramShortName, paramName, argc, argv); @@ -619,11 +617,11 @@ bool Parameter::LoadParam(int argc, char* xargv[]) int verbose = 1; if (m_setting.find("verbose") != m_setting.end() && m_setting["verbose"].size() > 0) verbose = Scan( - m_setting["verbose"][0]); + m_setting["verbose"][0]); if (verbose >= 1) { // only if verbose cerr << "Defined parameters (per moses.ini or switch):" << endl; for (PARAM_MAP::const_iterator iterParam = m_setting.begin(); - iterParam != m_setting.end(); iterParam++) { + iterParam != m_setting.end(); iterParam++) { cerr << "\t" << iterParam->first << ": "; for (size_t i = 0; i < iterParam->second.size(); i++) cerr << iterParam->second[i] << " "; @@ -717,7 +715,7 @@ void Parameter::SetWeight(const std::string &name, size_t ind, float weight) } void Parameter::SetWeight(const std::string &name, size_t ind, - const vector &weights) + const vector &weights) { PARAM_VEC &newWeights = m_setting["weight"]; string line = name + SPrint(ind) + "="; @@ -729,7 +727,7 @@ void Parameter::SetWeight(const std::string &name, size_t ind, } void Parameter::AddWeight(const std::string &name, size_t ind, - const std::vector &weights) + const std::vector &weights) { PARAM_VEC &newWeights = m_setting["weight"]; @@ -777,13 +775,12 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) PARAM_VEC &numInputScores = m_setting["input-scores"]; if (inputWeights.size() == 1) { UTIL_THROW_IF2(numInputScores.size() != 0, - "No [input-scores] section allowed"); + "No [input-scores] section allowed"); numInputScores.push_back("1"); numInputScores.push_back("0"); - } - else if (inputWeights.size() == 2) { + } else if (inputWeights.size() == 2) { UTIL_THROW_IF2(numInputScores.size() != 0, - "No [input-scores] section allowed"); + "No [input-scores] section allowed"); numInputScores.push_back("1"); numInputScores.push_back("1"); } @@ -821,15 +818,14 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) if (maxTargetPhrase.size() == 1 && translationVector.size() > 1) { cerr << "Using uniform ttable-limit of " << maxTargetPhrase[0] - << " for all translation tables." << endl; + << " for all translation tables." << endl; for (size_t i = 1; i < translationVector.size(); i++) maxTargetPhrase.push_back(maxTargetPhrase[0]); - } - else if (maxTargetPhrase.size() != 1 - && maxTargetPhrase.size() < translationVector.size()) { + } else if (maxTargetPhrase.size() != 1 + && maxTargetPhrase.size() < translationVector.size()) { std::cerr << "You specified " << translationVector.size() - << " translation tables, but only " << maxTargetPhrase.size() - << " ttable-limits."; + << " translation tables, but only " << maxTargetPhrase.size() + << " ttable-limits."; return; } @@ -848,7 +844,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) return; } UTIL_THROW_IF2(token.size() < 5, - "Phrase table must have at least 5 scores"); + "Phrase table must have at least 5 scores"); int implementation = Scan(token[0]); @@ -886,8 +882,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) if (ptIndices.find(ptType) == ptIndices.end()) { ptIndices[ptType] = 0; ptInd = 0; - } - else { + } else { ptInd = ++ptIndices[ptType]; } @@ -898,7 +893,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) vector weights(numFF); for (size_t currFF = 0; currFF < numFF; ++currFF) { UTIL_THROW_IF2(currOldInd >= oldWeights.size(), - "Errors converting old phrase-table weights to new weights"); + "Errors converting old phrase-table weights to new weights"); float weight = Scan(oldWeights[currOldInd]); weights[currFF] = weight; @@ -918,7 +913,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName) //characteristics of the phrase table vector input = Tokenize(token[1], ","), output = - Tokenize(token[2], ","); + Tokenize(token[2], ","); size_t numScoreComponent = Scan(token[3]); string filePath = token[4]; @@ -978,7 +973,7 @@ void Parameter::ConvertWeightArgsDistortion() const PARAM_VEC *lextable = GetParam(oldLexReordingName); for (size_t indTable = 0; lextable && indTable < lextable->size(); - ++indTable) { + ++indTable) { const string &line = lextable->at(indTable); vector toks = Tokenize(line); @@ -987,7 +982,7 @@ void Parameter::ConvertWeightArgsDistortion() vector weights(numFF); for (size_t currFF = 0; currFF < numFF; ++currFF) { UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(), - "Errors converting old distortion weights to new weights"); + "Errors converting old distortion weights to new weights"); float weight = Scan(oldWeights->at(currOldInd)); weights[currFF] = weight; @@ -1000,9 +995,9 @@ void Parameter::ConvertWeightArgsDistortion() vector factors = Tokenize(toks[0], "-"); UTIL_THROW_IF2(factors.size() != 2, - "Error in old factor specification for lexicalized reordering model: " << toks[0]); + "Error in old factor specification for lexicalized reordering model: " << toks[0]); strme << "input-factor=" << factors[0] << " output-factor=" << factors[1] - << " "; + << " "; strme << "num-features=" << toks[2] << " "; strme << "path=" << toks[3]; @@ -1074,7 +1069,7 @@ void Parameter::ConvertWeightArgsLM() vector weightsLM(numFF); for (size_t currFF = 0; currFF < numFF; ++currFF) { UTIL_THROW_IF2(currOldInd >= weights.size(), - "Errors converting old LM weights to new weights"); + "Errors converting old LM weights to new weights"); weightsLM[currFF] = Scan(weights[currOldInd]); if (isChartDecoding) { weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]); @@ -1086,12 +1081,11 @@ void Parameter::ConvertWeightArgsLM() SetWeight(newFeatureName, lmIndex, weightsLM); string featureLine = newFeatureName + " " + "factor=" + modelToks[1] + " " // factor - + "order=" + modelToks[2] + " " // order - + "num-features=" + SPrint(numFF) + " "; + + "order=" + modelToks[2] + " " // order + + "num-features=" + SPrint(numFF) + " "; if (lmType == 9) { featureLine += "lazyken=1 "; - } - else if (lmType == 8) { + } else if (lmType == 8) { featureLine += "lazyken=0 "; } @@ -1127,7 +1121,7 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName, vector weights(numFF); for (size_t currFF = 0; currFF < numFF; ++currFF) { UTIL_THROW_IF2(currOldInd >= oldWeights.size(), - "Errors converting old generation weights to new weights"); + "Errors converting old generation weights to new weights"); float weight = Scan(oldWeights[currOldInd]); weights[currFF] = weight; @@ -1137,8 +1131,8 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName, util::StringStream strme; strme << "Generation " << "input-factor=" << modelToks[0] << " " - << "output-factor=" << modelToks[1] << " " << "num-features=" - << modelToks[2] << " " << "path=" << modelToks[3]; + << "output-factor=" << modelToks[1] << " " << "num-features=" + << modelToks[2] << " " << "path=" << modelToks[3]; AddFeature(strme.str()); } } @@ -1184,7 +1178,7 @@ void Parameter::ConvertPhrasePenalty() const PARAM_VEC *params = GetParam(oldWeightName); if (params) { UTIL_THROW_IF2(params->size() != 1, - "There should be only 1 phrase-penalty weight"); + "There should be only 1 phrase-penalty weight"); float weight = Scan(params->at(0)); AddFeature("PhrasePenalty"); SetWeight("PhrasePenalty", 0, weight); @@ -1197,7 +1191,7 @@ void Parameter::ConvertWeightArgs() { // can't handle discr LM. must do it manually 'cos of bigram/n-gram split UTIL_THROW_IF2(m_setting.count("weight-dlm") != 0, - "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split"); + "Can't handle discr LM. must do it manually 'cos of bigram/n-gram split"); // check that old & new format aren't mixed if (m_setting.count("weight") @@ -1289,23 +1283,20 @@ void Parameter::WeightOverwrite() name = tok.substr(0, tok.size() - 1); std::map >::const_iterator found = - m_weights.find(name); + m_weights.find(name); if (found != m_weights.end()) { oldWeights = &(found->second); - } - else { + } else { oldWeights = NULL; } cnt = 0; - } - else { + } else { // a weight for curr ff if (toks[i] == "x") { UTIL_THROW_IF2(!oldWeights || cnt >= oldWeights->size(), - "Keeping previous weight failed in weight-overwrite"); + "Keeping previous weight failed in weight-overwrite"); weights.push_back(oldWeights->at(cnt)); - } - else { + } else { float weight = Scan(toks[i]); weights.push_back(weight); } @@ -1326,7 +1317,7 @@ bool Parameter::Validate() PARAM_MAP::const_iterator iterParams; for (iterParams = m_setting.begin(); iterParams != m_setting.end(); - ++iterParams) { + ++iterParams) { const std::string &key = iterParams->first; if (m_valid.find(key) == m_valid.end()) { @@ -1338,10 +1329,10 @@ bool Parameter::Validate() if (m_setting["lmodel-dub"].size() > 0) { if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) { std::cerr << "Config and parameters specify " - << static_cast(m_setting["lmodel-file"].size()) - << " language model files (lmodel-file), but " - << static_cast(m_setting["lmodel-dub"].size()) - << " LM upperbounds (lmodel-dub)" << endl; + << static_cast(m_setting["lmodel-file"].size()) + << " language model files (lmodel-file), but " + << static_cast(m_setting["lmodel-dub"].size()) + << " LM upperbounds (lmodel-dub)" << endl; noErrorFlag = false; } } @@ -1353,7 +1344,7 @@ bool Parameter::Validate() noErrorFlag = FileExists(m_setting["input-file"][0]); if (!noErrorFlag) { std::cerr << endl << "Input file " << m_setting["input-file"][0] - << " does not exist"; + << " does not exist"; } } // generation tables @@ -1381,7 +1372,7 @@ bool Parameter::Validate() /** check whether a file exists */ bool Parameter::FilesExist(const string ¶mName, int fieldNo, - std::vector const& extensions) + std::vector const& extensions) { typedef std::vector StringVec; StringVec::const_iterator iter; @@ -1401,8 +1392,8 @@ bool Parameter::FilesExist(const string ¶mName, int fieldNo, if (tokenizeIndex >= vec.size()) { std::cerr << "Expected at least " << (tokenizeIndex + 1) - << " tokens per entry in '" << paramName << "', but only found " - << vec.size(); + << " tokens per entry in '" << paramName << "', but only found " + << vec.size(); return false; } const string &pathStr = vec[tokenizeIndex]; @@ -1428,8 +1419,7 @@ string Parameter::FindParam(const string ¶mSwitch, int argc, char* argv[]) if (string(argv[i]) == paramSwitch) { if (i + 1 < argc) { return argv[i + 1]; - } - else { + } else { std::cerr << "Option " << paramSwitch << " requires a parameter!"; // TODO return some sort of error, not the empty string } @@ -1444,7 +1434,7 @@ string Parameter::FindParam(const string ¶mSwitch, int argc, char* argv[]) * \param argc number of arguments on command line * \param argv values of paramters on command line */ void Parameter::OverwriteParam(const string ¶mSwitch, - const string ¶mName, int argc, char* argv[]) + const string ¶mName, int argc, char* argv[]) { int startPos = -1; for (int i = 0; i < argc; i++) { @@ -1480,8 +1470,7 @@ bool Parameter::ReadConfigFile(const string &filePath) if (line.size() == 0) { // blank line. do nothing. - } - else if (line[0] == '[') { + } else if (line[0] == '[') { // new parameter for (size_t currPos = 0; currPos < line.size(); currPos++) { if (line[currPos] == ']') { @@ -1489,8 +1478,7 @@ bool Parameter::ReadConfigFile(const string &filePath) break; } } - } - else { + } else { // add value to parameter m_setting[paramName].push_back(line); } @@ -1498,14 +1486,12 @@ bool Parameter::ReadConfigFile(const string &filePath) return true; } -struct Credit -{ +struct Credit { string name, contact, currentPursuits, areaResponsibility; int sortId; Credit(string name, string contact, string currentPursuits, - string areaResponsibility) - { + string areaResponsibility) { this->name = name; this->contact = contact; this->currentPursuits = currentPursuits; @@ -1513,8 +1499,7 @@ struct Credit this->sortId = util::rand_excl(1000); } - bool operator<(const Credit &other) const - { + bool operator<(const Credit &other) const { /* if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0) return true; @@ -1534,7 +1519,7 @@ std::ostream& operator<<(std::ostream &os, const Credit &credit) if (credit.contact != "") os << "\t contact: " << credit.contact; if (credit.currentPursuits != "") os << " " << credit.currentPursuits; if (credit.areaResponsibility != "") os << " I'll answer question on: " - << credit.areaResponsibility; + << credit.areaResponsibility; return os; } @@ -1544,38 +1529,38 @@ void Parameter::PrintCredit() srand(time(NULL)); everyone.push_back( - Credit("Nicola Bertoldi", "911", "", "scripts & other stuff")); + Credit("Nicola Bertoldi", "911", "", "scripts & other stuff")); everyone.push_back(Credit("Ondrej Bojar", "", "czech this out!", "")); everyone.push_back( - Credit("Chris Callison-Burch", "anytime, anywhere", - "international playboy", "")); + Credit("Chris Callison-Burch", "anytime, anywhere", + "international playboy", "")); everyone.push_back(Credit("Alexandra Constantin", "", "eu sunt varza", "")); everyone.push_back( - Credit("Brooke Cowan", "brooke@csail.mit.edu", - "if you're going to san francisco, be sure to wear a flower in your hair", - "")); + Credit("Brooke Cowan", "brooke@csail.mit.edu", + "if you're going to san francisco, be sure to wear a flower in your hair", + "")); everyone.push_back( - Credit("Chris Dyer", "can't. i'll be out driving my mustang", - "driving my mustang", "")); + Credit("Chris Dyer", "can't. i'll be out driving my mustang", + "driving my mustang", "")); everyone.push_back( - Credit("Marcello Federico", "federico at itc at it", - "Researcher at ITC-irst, Trento, Italy", "IRST language model")); + Credit("Marcello Federico", "federico at itc at it", + "Researcher at ITC-irst, Trento, Italy", "IRST language model")); everyone.push_back( - Credit("Evan Herbst", "Small college in upstate New York", "", "")); + Credit("Evan Herbst", "Small college in upstate New York", "", "")); everyone.push_back( - Credit("Philipp Koehn", "only between 2 and 4am", "", - "Nothing fazes this dude")); + Credit("Philipp Koehn", "only between 2 and 4am", "", + "Nothing fazes this dude")); everyone.push_back( - Credit("Christine Moran", "weird building at MIT", "", "")); + Credit("Christine Moran", "weird building at MIT", "", "")); everyone.push_back( - Credit("Wade Shen", "via morse code", "buying another laptop", "")); + Credit("Wade Shen", "via morse code", "buying another laptop", "")); everyone.push_back( - Credit("Richard Zens", "richard at aachen dot de", "", - "ambiguous source input, confusion networks, confusing source code")); + Credit("Richard Zens", "richard at aachen dot de", "", + "ambiguous source input, confusion networks, confusing source code")); everyone.push_back( - Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/", - "phd student at Edinburgh Uni. Original Moses developer", - "general queries/ flames on Moses.")); + Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/", + "phd student at Edinburgh Uni. Original Moses developer", + "general queries/ flames on Moses.")); sort(everyone.begin(), everyone.end()); @@ -1622,18 +1607,17 @@ void Parameter::OverwriteParam(const string ¶mName, PARAM_VEC values) m_setting[paramName]; // defines the parameter, important for boolean switches if (m_setting[paramName].size() > 1) { cerr << " (the parameter had " << m_setting[paramName].size() - << " previous values)"; + << " previous values)"; UTIL_THROW_IF2(m_setting[paramName].size() != values.size(), - "Number of weight override for " << paramName << " is not the same as the original number of weights"); - } - else { + "Number of weight override for " << paramName << " is not the same as the original number of weights"); + } else { cerr << " (the parameter does not have previous values)"; m_setting[paramName].resize(values.size()); } cerr << " with the following values:"; int i = 0; for (PARAM_VEC::iterator iter = values.begin(); iter != values.end(); - iter++, i++) { + iter++, i++) { m_setting[paramName][i] = *iter; cerr << " " << *iter; } @@ -1658,7 +1642,7 @@ void Parameter::Save(const std::string path) PARAM_MAP::const_iterator iterOuter; for (iterOuter = m_setting.begin(); iterOuter != m_setting.end(); - ++iterOuter) { + ++iterOuter) { const std::string §ionName = iterOuter->first; file << "[" << sectionName << "]" << endl; @@ -1678,7 +1662,7 @@ void Parameter::Save(const std::string path) template<> void Parameter::SetParameter(bool ¶meter, - std::string const& parameterName, bool const& defaultValue) const + std::string const& parameterName, bool const& defaultValue) const { const PARAM_VEC *params = GetParam(parameterName); diff --git a/moses2/legacy/Parameter.h b/moses2/legacy/Parameter.h index f43ce98a4..501f35e99 100644 --- a/moses2/legacy/Parameter.h +++ b/moses2/legacy/Parameter.h @@ -58,46 +58,46 @@ protected: std::string FindParam(const std::string ¶mSwitch, int argc, char* argv[]); void OverwriteParam(const std::string ¶mSwitch, - const std::string ¶mName, int argc, char* argv[]); + const std::string ¶mName, int argc, char* argv[]); bool ReadConfigFile(const std::string &filePath); bool FilesExist(const std::string ¶mName, int fieldNo, - std::vector const& fileExtension = std::vector( - 1, "")); + std::vector const& fileExtension = std::vector( + 1, "")); bool isOption(const char* token); bool Validate(); void AddParam(options_description& optgroup, value_semantic const* optvalue, - std::string const& paramName, std::string const& description); + std::string const& paramName, std::string const& description); void AddParam(options_description& optgroup, std::string const ¶mName, - std::string const &description); + std::string const &description); void AddParam(options_description& optgroup, value_semantic const* optvalue, - std::string const& paramName, std::string const& abbrevName, - std::string const& description); + std::string const& paramName, std::string const& abbrevName, + std::string const& description); void AddParam(options_description& optgroup, std::string const& paramName, - std::string const& abbrevName, std::string const& description); + std::string const& abbrevName, std::string const& description); void PrintCredit(); void SetWeight(const std::string &name, size_t ind, float weight); void SetWeight(const std::string &name, size_t ind, - const std::vector &weights); + const std::vector &weights); void AddWeight(const std::string &name, size_t ind, - const std::vector &weights); + const std::vector &weights); void ConvertWeightArgs(); void ConvertWeightArgsSingleWeight(const std::string &oldWeightName, - const std::string &newWeightName); + const std::string &newWeightName); void ConvertWeightArgsPhraseModel(const std::string &oldWeightName); void ConvertWeightArgsLM(); void ConvertWeightArgsDistortion(); void ConvertWeightArgsGeneration(const std::string &oldWeightName, - const std::string &newWeightName); + const std::string &newWeightName); void ConvertWeightArgsPhrasePenalty(); void ConvertWeightArgsWordPenalty(); void ConvertPhrasePenalty(); @@ -118,22 +118,19 @@ public: const PARAM_VEC *GetParam(const std::string ¶mName) const; /** check if parameter is defined (either in moses.ini or as switch) */ - bool isParamSpecified(const std::string ¶mName) const - { + bool isParamSpecified(const std::string ¶mName) const { return m_setting.find(paramName) != m_setting.end(); } void OverwriteParam(const std::string ¶mName, PARAM_VEC values); std::vector GetWeights(const std::string &name); - const std::map > &GetAllWeights() const - { + const std::map > &GetAllWeights() const { return m_weights; } std::set GetWeightNames() const; - const PARAM_MAP &GetParams() const - { + const PARAM_MAP &GetParams() const { return m_setting; } @@ -141,21 +138,18 @@ public: template void SetParameter(T &var, const std::string &name, - const T &defaultValue) const - { + const T &defaultValue) const { const PARAM_VEC *params = GetParam(name); if (params && params->size()) { var = Scan(params->at(0)); - } - else { + } else { var = defaultValue; } } void SetParameter(bool& var, std::string const& name); - bool SetBooleanSwitch(bool& val, std::string const name) - { + bool SetBooleanSwitch(bool& val, std::string const name) { // issues a warning if format is wrong const PARAM_VEC *params = GetParam(name); val = (params && params->size()); @@ -170,7 +164,7 @@ public: template<> void Parameter::SetParameter(bool &var, const std::string &name, - const bool &defaultValue) const; + const bool &defaultValue) const; } diff --git a/moses2/legacy/Range.h b/moses2/legacy/Range.h index 76d720bed..9acfba45d 100644 --- a/moses2/legacy/Range.h +++ b/moses2/legacy/Range.h @@ -44,48 +44,40 @@ class Range // m_endPos is inclusive size_t m_startPos, m_endPos; public: - inline explicit Range() - { + inline explicit Range() { } inline Range(size_t startPos, size_t endPos) : - m_startPos(startPos), m_endPos(endPos) - { + m_startPos(startPos), m_endPos(endPos) { } inline Range(const Range ©) : - m_startPos(copy.GetStartPos()), m_endPos(copy.GetEndPos()) - { + m_startPos(copy.GetStartPos()), m_endPos(copy.GetEndPos()) { } - inline size_t GetStartPos() const - { + inline size_t GetStartPos() const { return m_startPos; } - inline size_t GetEndPos() const - { + inline size_t GetEndPos() const { return m_endPos; } - inline void SetStartPos(size_t val) - { + inline void SetStartPos(size_t val) { m_startPos = val; } - inline void SetEndPos(size_t val) - { + inline void SetEndPos(size_t val) { m_endPos = val; } //! count of words translated - inline size_t GetNumWordsCovered() const - { + inline size_t GetNumWordsCovered() const { assert( - (m_startPos == NOT_FOUND && m_endPos == NOT_FOUND) || (m_startPos != NOT_FOUND && m_endPos != NOT_FOUND)); + (m_startPos == NOT_FOUND && m_endPos == NOT_FOUND) || (m_startPos != NOT_FOUND && m_endPos != NOT_FOUND)); return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1; } //! transitive comparison inline bool operator<(const Range& x) const { return (m_startPos= 0) { @@ -104,8 +104,7 @@ void ThreadPool::Execute() task->Run(); } m_threadAvailable.notify_all(); - } - while (!m_stopped); + } while (!m_stopped); } void ThreadPool::Submit(boost::shared_ptr task) diff --git a/moses2/legacy/ThreadPool.h b/moses2/legacy/ThreadPool.h index 62a8f43ad..e2cfac4a8 100644 --- a/moses2/legacy/ThreadPool.h +++ b/moses2/legacy/ThreadPool.h @@ -51,12 +51,10 @@ class Task { public: virtual void Run() = 0; - virtual bool DeleteAfterExecution() - { + virtual bool DeleteAfterExecution() { return true; } - virtual ~Task() - { + virtual ~Task() { } }; @@ -67,10 +65,9 @@ public: * Construct a thread pool of a fixed size. **/ explicit ThreadPool(size_t numThreads, int cpuAffinityOffset = -1, - int cpuAffinityIncr = 1); + int cpuAffinityIncr = 1); - ~ThreadPool() - { + ~ThreadPool() { Stop(); } @@ -88,8 +85,7 @@ public: /** * Set maximum number of queued threads (otherwise Submit blocks) **/ - void SetQueueLimit(size_t limit) - { + void SetQueueLimit(size_t limit) { m_queueLimit = limit; } @@ -113,12 +109,10 @@ class TestTask: public Task { public: TestTask(int id) : - m_id(id) - { + m_id(id) { } - virtual void Run() - { + virtual void Run() { #ifdef BOOST_HAS_PTHREADS pthread_t tid = pthread_self(); #else @@ -128,8 +122,7 @@ public: std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl; } - virtual ~TestTask() - { + virtual ~TestTask() { } private: diff --git a/moses2/legacy/Timer.cpp b/moses2/legacy/Timer.cpp index b1857ee0d..81858e2fc 100644 --- a/moses2/legacy/Timer.cpp +++ b/moses2/legacy/Timer.cpp @@ -8,7 +8,7 @@ namespace Moses2 { Timer::Timer() : - running(false), stopped(false) + running(false), stopped(false) { start_time = 0; } @@ -46,8 +46,7 @@ void Timer::start(const char* msg) if (stopped) { start_time = util::WallTime() - (stop_time - start_time); stopped = false; - } - else { + } else { start_time = util::WallTime(); running = true; } diff --git a/moses2/legacy/Util2.cpp b/moses2/legacy/Util2.cpp index ffc348090..9b4ff217c 100644 --- a/moses2/legacy/Util2.cpp +++ b/moses2/legacy/Util2.cpp @@ -15,14 +15,15 @@ bool Scan(const std::string &input) if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true; if (lc == "no" || lc == "n" || lc == "false" || lc == "0") return false; UTIL_THROW(BoolValueException, - "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0."); + "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0."); } const std::string ToLower(const std::string& str) { std::string lc(str); - std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower);return -lc ; + std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower); + return + lc ; } } diff --git a/moses2/legacy/Util2.h b/moses2/legacy/Util2.h index eef638f93..47b4a08ec 100644 --- a/moses2/legacy/Util2.h +++ b/moses2/legacy/Util2.h @@ -21,23 +21,19 @@ template class UnorderedComparer { public: - size_t operator()(const T& obj) const - { + size_t operator()(const T& obj) const { return obj.hash(); } - bool operator()(const T& a, const T& b) const - { + bool operator()(const T& a, const T& b) const { return a == b; } - size_t operator()(const T* obj) const - { + size_t operator()(const T* obj) const { return obj->hash(); } - bool operator()(const T* a, const T* b) const - { + bool operator()(const T* a, const T* b) const { return (*a) == (*b); } @@ -53,7 +49,7 @@ void Init(T arr[], size_t size, const T &val) //! delete white spaces at beginning and end of string inline std::string Trim(const std::string& str, const std::string dropChars = - " \t\n\r") + " \t\n\r") { std::string res = str; res.erase(str.find_last_not_of(dropChars) + 1); @@ -107,32 +103,32 @@ inline SearchAlgorithm Scan(const std::string &input) } template<> - inline XmlInputType Scan(const std::string &input) - { - XmlInputType ret; - if (input=="exclusive") ret = XmlExclusive; - else if (input=="inclusive") ret = XmlInclusive; - else if (input=="constraint") ret = XmlConstraint; - else if (input=="ignore") ret = XmlIgnore; - else if (input=="pass-through") ret = XmlPassThrough; - else { - UTIL_THROW2("Unknown XML input type"); - } - - return ret; +inline XmlInputType Scan(const std::string &input) +{ + XmlInputType ret; + if (input=="exclusive") ret = XmlExclusive; + else if (input=="inclusive") ret = XmlInclusive; + else if (input=="constraint") ret = XmlConstraint; + else if (input=="ignore") ret = XmlIgnore; + else if (input=="pass-through") ret = XmlPassThrough; + else { + UTIL_THROW2("Unknown XML input type"); } + return ret; +} + template<> - inline InputTypeEnum Scan(const std::string &input) - { - return (InputTypeEnum) Scan(input); - } +inline InputTypeEnum Scan(const std::string &input) +{ + return (InputTypeEnum) Scan(input); +} template<> - inline WordAlignmentSort Scan(const std::string &input) - { - return (WordAlignmentSort) Scan(input); - } +inline WordAlignmentSort Scan(const std::string &input) +{ + return (WordAlignmentSort) Scan(input); +} //! convert vectors of string to vectors of type T variables template @@ -182,7 +178,7 @@ inline std::vector Tokenize(const std::string& str, //! tokenise input string to vector of type T template inline std::vector Tokenize(const std::string &input, - const std::string& delimiters = " \t") + const std::string& delimiters = " \t") { std::vector stringVector = Tokenize(input, delimiters); return Scan(stringVector); @@ -201,8 +197,7 @@ inline std::vector TokenizeFirstOnly(const std::string& str, // Found a token, add it to the vector. tokens.push_back(str.substr(0, pos)); tokens.push_back(str.substr(pos + 1, str.size() - pos - 1)); - } - else { + } else { tokens.push_back(str); } @@ -210,7 +205,7 @@ inline std::vector TokenizeFirstOnly(const std::string& str, } inline std::vector TokenizeMultiCharSeparator( - const std::string& str, const std::string& separator) + const std::string& str, const std::string& separator) { std::vector tokens; @@ -233,7 +228,7 @@ inline std::vector TokenizeMultiCharSeparator( // speeded up version of above inline void TokenizeMultiCharSeparator(std::vector &output, - const std::string& str, const std::string& separator) + const std::string& str, const std::string& separator) { size_t pos = 0; // Find first "non-delimiter". @@ -296,7 +291,7 @@ template void RemoveAllInColl(COLL &coll) { for (typename COLL::const_iterator iter = coll.begin(); iter != coll.end(); - ++iter) { + ++iter) { delete (*iter); } coll.clear(); @@ -328,10 +323,8 @@ T &GetThreadSpecificObj(boost::thread_specific_ptr &coll) template S& Container(std::priority_queue& q) { - struct HackedQueue: private std::priority_queue - { - static S& Container(std::priority_queue& q) - { + struct HackedQueue: private std::priority_queue { + static S& Container(std::priority_queue& q) { return q.*&HackedQueue::c; } }; diff --git a/moses2/legacy/gzfilebuf.h b/moses2/legacy/gzfilebuf.h index ea7021757..db5998095 100644 --- a/moses2/legacy/gzfilebuf.h +++ b/moses2/legacy/gzfilebuf.h @@ -15,40 +15,34 @@ namespace Moses2 class gzfilebuf: public std::streambuf { public: - gzfilebuf(const char *filename) - { + gzfilebuf(const char *filename) { _gzf = gzopen(filename, "rb"); if (!_gzf) throw std::runtime_error( "Could not open " + std::string(filename) + "."); setg(_buff + sizeof(int), // beginning of putback area - _buff + sizeof(int), // read position - _buff + sizeof(int)); // end position + _buff + sizeof(int), // read position + _buff + sizeof(int)); // end position } - ~gzfilebuf() - { + ~gzfilebuf() { gzclose(_gzf); } protected: - virtual int_type overflow(int_type /* c */) - { + virtual int_type overflow(int_type /* c */) { throw; } // write multiple characters - virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */) - { + virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */) { throw; } virtual std::streampos seekpos(std::streampos /* sp */, - std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */) - { + std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */) { throw; } //read one character - virtual int_type underflow() - { + virtual int_type underflow() { // is read position before end of _buff? if (gptr() < egptr()) { return traits_type::to_int_type(*gptr()); @@ -67,7 +61,7 @@ protected: * the putback _buff (area of first four characters) */ std::memmove(_buff + (sizeof(int) - numPutback), gptr() - numPutback, - numPutback); + numPutback); // read new characters int num = gzread(_gzf, _buff + sizeof(int), _buffsize - sizeof(int)); @@ -78,15 +72,14 @@ protected: // reset _buff pointers setg(_buff + (sizeof(int) - numPutback), // beginning of putback area - _buff + sizeof(int), // read position - _buff + sizeof(int) + num); // end of buffer + _buff + sizeof(int), // read position + _buff + sizeof(int) + num); // end of buffer // return next character return traits_type::to_int_type(*gptr()); } - std::streamsize xsgetn(char* s, std::streamsize num) - { + std::streamsize xsgetn(char* s, std::streamsize num) { return gzread(_gzf, s, num); } diff --git a/moses2/parameters/AllOptions.cpp b/moses2/parameters/AllOptions.cpp index c4171d807..85b54e4b4 100644 --- a/moses2/parameters/AllOptions.cpp +++ b/moses2/parameters/AllOptions.cpp @@ -4,120 +4,115 @@ namespace Moses2 { - AllOptions:: - AllOptions() - : mira(false) - , use_legacy_pt(false) - { } +AllOptions:: +AllOptions() + : mira(false) + , use_legacy_pt(false) +{ } - AllOptions:: - AllOptions(Parameter const& param) - { - init(param); +AllOptions:: +AllOptions(Parameter const& param) +{ + init(param); +} + +bool +AllOptions:: +init(Parameter const& param) +{ + if (!search.init(param)) return false; + if (!cube.init(param)) return false; + if (!nbest.init(param)) return false; + if (!reordering.init(param)) return false; + if (!context.init(param)) return false; + if (!input.init(param)) return false; + if (!mbr.init(param)) return false; + if (!lmbr.init(param)) return false; + if (!output.init(param)) return false; + if (!unk.init(param)) return false; + if (!server.init(param)) return false; + if (!syntax.init(param)) return false; + + param.SetParameter(mira, "mira", false); + + return sanity_check(); +} + +bool +AllOptions:: +sanity_check() +{ + using namespace std; + if (lmbr.enabled) { + if (mbr.enabled) { + cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl; + return false; + } + mbr.enabled = true; + } + if (search.consensus) { + if (mbr.enabled) { + cerr << "Error: Cannot use consensus decoding together with mbr" + << endl; + return false; + } + mbr.enabled = true; } - bool - AllOptions:: - init(Parameter const& param) - { - if (!search.init(param)) return false; - if (!cube.init(param)) return false; - if (!nbest.init(param)) return false; - if (!reordering.init(param)) return false; - if (!context.init(param)) return false; - if (!input.init(param)) return false; - if (!mbr.init(param)) return false; - if (!lmbr.init(param)) return false; - if (!output.init(param)) return false; - if (!unk.init(param)) return false; - if (!server.init(param)) return false; - if (!syntax.init(param)) return false; - - param.SetParameter(mira, "mira", false); - - return sanity_check(); + // RecoverPath should only be used with confusion net or word lattice input + if (output.RecoverPath && input.input_type == SentenceInput) { + TRACE_ERR("--recover-input-path should only be used with " + <<"confusion net or word lattice input!\n"); + output.RecoverPath = false; } - bool - AllOptions:: - sanity_check() - { - using namespace std; - if (lmbr.enabled) - { - if (mbr.enabled) - { - cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl; - return false; - } - mbr.enabled = true; - } - if (search.consensus) - { - if (mbr.enabled) - { - cerr << "Error: Cannot use consensus decoding together with mbr" - << endl; - return false; - } - mbr.enabled = true; - } + // set m_nbest_options.enabled = true if necessary: + nbest.enabled = (nbest.enabled || mira || search.consensus + || nbest.nbest_size > 0 + || mbr.enabled || lmbr.enabled + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty() + || !output.SearchGraphPB.empty() + || output.lattice_sample_size != 0); - // RecoverPath should only be used with confusion net or word lattice input - if (output.RecoverPath && input.input_type == SentenceInput) - { - TRACE_ERR("--recover-input-path should only be used with " - <<"confusion net or word lattice input!\n"); - output.RecoverPath = false; - } - - // set m_nbest_options.enabled = true if necessary: - nbest.enabled = (nbest.enabled || mira || search.consensus - || nbest.nbest_size > 0 - || mbr.enabled || lmbr.enabled - || !output.SearchGraph.empty() - || !output.SearchGraphExtended.empty() - || !output.SearchGraphSLF.empty() - || !output.SearchGraphHG.empty() - || !output.SearchGraphPB.empty() - || output.lattice_sample_size != 0); - - return true; - } + return true; +} #ifdef HAVE_XMLRPC_C - bool - AllOptions:: - update(std::mapconst& param) - { - if (!search.update(param)) return false; - if (!cube.update(param)) return false; - if (!nbest.update(param)) return false; - if (!reordering.update(param)) return false; - if (!context.update(param)) return false; - if (!input.update(param)) return false; - if (!mbr.update(param)) return false; - if (!lmbr.update(param)) return false; - if (!output.update(param)) return false; - if (!unk.update(param)) return false; - if (!server.update(param)) return false; - //if (!syntax.update(param)) return false; - return sanity_check(); - } +bool +AllOptions:: +update(std::mapconst& param) +{ + if (!search.update(param)) return false; + if (!cube.update(param)) return false; + if (!nbest.update(param)) return false; + if (!reordering.update(param)) return false; + if (!context.update(param)) return false; + if (!input.update(param)) return false; + if (!mbr.update(param)) return false; + if (!lmbr.update(param)) return false; + if (!output.update(param)) return false; + if (!unk.update(param)) return false; + if (!server.update(param)) return false; + //if (!syntax.update(param)) return false; + return sanity_check(); +} #endif - bool - AllOptions:: - NBestDistinct() const - { - return (nbest.only_distinct - || mbr.enabled || lmbr.enabled - || output.lattice_sample_size - || !output.SearchGraph.empty() - || !output.SearchGraphExtended.empty() - || !output.SearchGraphSLF.empty() - || !output.SearchGraphHG.empty()); - } - - +bool +AllOptions:: +NBestDistinct() const +{ + return (nbest.only_distinct + || mbr.enabled || lmbr.enabled + || output.lattice_sample_size + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty()); +} + + } diff --git a/moses2/parameters/AllOptions.h b/moses2/parameters/AllOptions.h index 694a8a347..2f09cd385 100644 --- a/moses2/parameters/AllOptions.h +++ b/moses2/parameters/AllOptions.h @@ -18,34 +18,33 @@ namespace Moses2 { - struct - AllOptions : public OptionsBaseClass - { - typedef boost::shared_ptr ptr; - SearchOptions search; - CubePruningOptions cube; - NBestOptions nbest; - ReorderingOptions reordering; - ContextParameters context; - InputOptions input; - MBR_Options mbr; - LMBR_Options lmbr; - ReportingOptions output; - OOVHandlingOptions unk; - ServerOptions server; - SyntaxOptions syntax; - bool mira; - bool use_legacy_pt; - // StackOptions stack; - // BeamSearchOptions beam; - bool init(Parameter const& param); - bool sanity_check(); - AllOptions(); - AllOptions(Parameter const& param); +struct + AllOptions : public OptionsBaseClass { + typedef boost::shared_ptr ptr; + SearchOptions search; + CubePruningOptions cube; + NBestOptions nbest; + ReorderingOptions reordering; + ContextParameters context; + InputOptions input; + MBR_Options mbr; + LMBR_Options lmbr; + ReportingOptions output; + OOVHandlingOptions unk; + ServerOptions server; + SyntaxOptions syntax; + bool mira; + bool use_legacy_pt; + // StackOptions stack; + // BeamSearchOptions beam; + bool init(Parameter const& param); + bool sanity_check(); + AllOptions(); + AllOptions(Parameter const& param); - bool update(std::mapconst& param); - bool NBestDistinct() const; - - }; + bool update(std::mapconst& param); + bool NBestDistinct() const; + +}; } diff --git a/moses2/parameters/BeamSearchOptions.h b/moses2/parameters/BeamSearchOptions.h index d67c43438..590c7a53f 100644 --- a/moses2/parameters/BeamSearchOptions.h +++ b/moses2/parameters/BeamSearchOptions.h @@ -5,11 +5,10 @@ namespace Moses2 { - struct - BeamSearchOptions : public OptionsBaseClass - { - bool init(Parameter const& param); - BeamSearchOptions(Parameter const& param); - }; +struct + BeamSearchOptions : public OptionsBaseClass { + bool init(Parameter const& param); + BeamSearchOptions(Parameter const& param); +}; } diff --git a/moses2/parameters/BookkeepingOptions.h b/moses2/parameters/BookkeepingOptions.h index ad7c78301..75a04a2a0 100644 --- a/moses2/parameters/BookkeepingOptions.h +++ b/moses2/parameters/BookkeepingOptions.h @@ -6,13 +6,12 @@ namespace Moses2 { class Parameter; - struct BookkeepingOptions : public OptionsBaseClass - { - bool need_alignment_info; - bool init(Parameter const& param); - BookkeepingOptions(); - }; - +struct BookkeepingOptions : public OptionsBaseClass { + bool need_alignment_info; + bool init(Parameter const& param); + BookkeepingOptions(); +}; + } diff --git a/moses2/parameters/ContextParameters.cpp b/moses2/parameters/ContextParameters.cpp index 144692399..64a3307a8 100644 --- a/moses2/parameters/ContextParameters.cpp +++ b/moses2/parameters/ContextParameters.cpp @@ -7,7 +7,7 @@ namespace Moses2 ContextParameters:: ContextParameters() - : look_ahead(0), look_back(0) + : look_ahead(0), look_back(0) { } bool @@ -21,18 +21,17 @@ init(Parameter const& params) if (context_window == "") return true; - - if (context_window.substr(0,3) == "all") - { - look_back = look_ahead = std::numeric_limits::max(); - return true; - } - + + if (context_window.substr(0,3) == "all") { + look_back = look_ahead = std::numeric_limits::max(); + return true; + } + size_t p = context_window.find_first_of("0123456789"); if (p == 0) look_back = look_ahead = atoi(context_window.c_str()); - - if (p == 1) { + + if (p == 1) { if (context_window[0] == '-') look_back = atoi(context_window.substr(1).c_str()); else if (context_window[0] == '+') diff --git a/moses2/parameters/CubePruningOptions.cpp b/moses2/parameters/CubePruningOptions.cpp index 35663e61d..0772eaddb 100644 --- a/moses2/parameters/CubePruningOptions.cpp +++ b/moses2/parameters/CubePruningOptions.cpp @@ -6,74 +6,70 @@ namespace Moses2 { - CubePruningOptions:: - CubePruningOptions() - : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT) - , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY) - , lazy_scoring(false) - , deterministic_search(false) - {} +CubePruningOptions:: +CubePruningOptions() + : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT) + , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY) + , lazy_scoring(false) + , deterministic_search(false) +{} - bool - CubePruningOptions:: - init(Parameter const& param) - { - param.SetParameter(pop_limit, "cube-pruning-pop-limit", - DEFAULT_CUBE_PRUNING_POP_LIMIT); - param.SetParameter(diversity, "cube-pruning-diversity", - DEFAULT_CUBE_PRUNING_DIVERSITY); - param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false); - //param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false); - return true; - } +bool +CubePruningOptions:: +init(Parameter const& param) +{ + param.SetParameter(pop_limit, "cube-pruning-pop-limit", + DEFAULT_CUBE_PRUNING_POP_LIMIT); + param.SetParameter(diversity, "cube-pruning-diversity", + DEFAULT_CUBE_PRUNING_DIVERSITY); + param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false); + //param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false); + return true; +} #ifdef HAVE_XMLRPC_C - bool - CubePruningOptions:: - update(std::mapconst& params) - { - typedef std::map params_t; +bool +CubePruningOptions:: +update(std::mapconst& params) +{ + typedef std::map params_t; - params_t::const_iterator si = params.find("cube-pruning-pop-limit"); - if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second); - - si = params.find("cube-pruning-diversity"); - if (si != params.end()) diversity = xmlrpc_c::value_int(si->second); - - si = params.find("cube-pruning-lazy-scoring"); - if (si != params.end()) - { - std::string spec = xmlrpc_c::value_string(si->second); - if (spec == "true" or spec == "on" or spec == "1") - lazy_scoring = true; - else if (spec == "false" or spec == "off" or spec == "0") - lazy_scoring = false; - else - { - char const* msg - = "Error parsing specification for cube-pruning-lazy-scoring"; - xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); - } - } + params_t::const_iterator si = params.find("cube-pruning-pop-limit"); + if (si != params.end()) pop_limit = xmlrpc_c::value_int(si->second); - si = params.find("cube-pruning-deterministic-search"); - if (si != params.end()) - { - std::string spec = xmlrpc_c::value_string(si->second); - if (spec == "true" or spec == "on" or spec == "1") - deterministic_search = true; - else if (spec == "false" or spec == "off" or spec == "0") - deterministic_search = false; - else - { - char const* msg - = "Error parsing specification for cube-pruning-deterministic-search"; - xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); - } - } + si = params.find("cube-pruning-diversity"); + if (si != params.end()) diversity = xmlrpc_c::value_int(si->second); - return true; + si = params.find("cube-pruning-lazy-scoring"); + if (si != params.end()) { + std::string spec = xmlrpc_c::value_string(si->second); + if (spec == "true" or spec == "on" or spec == "1") + lazy_scoring = true; + else if (spec == "false" or spec == "off" or spec == "0") + lazy_scoring = false; + else { + char const* msg + = "Error parsing specification for cube-pruning-lazy-scoring"; + xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); } + } + + si = params.find("cube-pruning-deterministic-search"); + if (si != params.end()) { + std::string spec = xmlrpc_c::value_string(si->second); + if (spec == "true" or spec == "on" or spec == "1") + deterministic_search = true; + else if (spec == "false" or spec == "off" or spec == "0") + deterministic_search = false; + else { + char const* msg + = "Error parsing specification for cube-pruning-deterministic-search"; + xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); + } + } + + return true; +} #endif diff --git a/moses2/parameters/CubePruningOptions.h b/moses2/parameters/CubePruningOptions.h index 2e9c898dc..6fa43b7ec 100644 --- a/moses2/parameters/CubePruningOptions.h +++ b/moses2/parameters/CubePruningOptions.h @@ -6,20 +6,19 @@ namespace Moses2 { - struct - CubePruningOptions : public OptionsBaseClass - { - size_t pop_limit; - size_t diversity; - bool lazy_scoring; - bool deterministic_search; +struct + CubePruningOptions : public OptionsBaseClass { + size_t pop_limit; + size_t diversity; + bool lazy_scoring; + bool deterministic_search; - bool init(Parameter const& param); - CubePruningOptions(Parameter const& param); - CubePruningOptions(); + bool init(Parameter const& param); + CubePruningOptions(Parameter const& param); + CubePruningOptions(); - bool - update(std::mapconst& params); - }; + bool + update(std::mapconst& params); +}; } diff --git a/moses2/parameters/InputOptions.cpp b/moses2/parameters/InputOptions.cpp index c008e98c4..435e70e96 100644 --- a/moses2/parameters/InputOptions.cpp +++ b/moses2/parameters/InputOptions.cpp @@ -9,94 +9,93 @@ namespace Moses2 { - InputOptions:: - InputOptions() - : continue_partial_translation(false) - , input_type(SentenceInput) - , xml_policy(XmlPassThrough) - , placeholder_factor(NOT_FOUND) - { - xml_brackets.first = "<"; - xml_brackets.second = ">"; - factor_order.assign(1,0); - factor_delimiter = "|"; +InputOptions:: +InputOptions() + : continue_partial_translation(false) + , input_type(SentenceInput) + , xml_policy(XmlPassThrough) + , placeholder_factor(NOT_FOUND) +{ + xml_brackets.first = "<"; + xml_brackets.second = ">"; + factor_order.assign(1,0); + factor_delimiter = "|"; +} + +bool +InputOptions:: +init(Parameter const& param) +{ + param.SetParameter(input_type, "inputtype", SentenceInput); +#if 0 + if (input_type == SentenceInput) { + VERBOSE(2, "input type is: text input"); + } else if (input_type == ConfusionNetworkInput) { + VERBOSE(2, "input type is: confusion net"); + } else if (input_type == WordLatticeInput) { + VERBOSE(2, "input type is: word lattice"); + } else if (input_type == TreeInputType) { + VERBOSE(2, "input type is: tree"); + } else if (input_type == TabbedSentenceInput) { + VERBOSE(2, "input type is: tabbed sentence"); + } else if (input_type == ForestInputType) { + VERBOSE(2, "input type is: forest"); + } +#endif + + + param.SetParameter(continue_partial_translation, + "continue-partial-translation", false); + + param.SetParameter(xml_policy, "xml-input", XmlPassThrough); + + // specify XML tags opening and closing brackets for XML option + // Do we really want this to be configurable???? UG + const PARAM_VEC *pspec; + pspec = param.GetParam("xml-brackets"); + if (pspec && pspec->size()) { + std::vector brackets = Tokenize(pspec->at(0)); + if(brackets.size()!=2) { + std::cerr << "invalid xml-brackets value, " + << "must specify exactly 2 blank-delimited strings " + << "for XML tags opening and closing brackets" + << std::endl; + exit(1); + } + + xml_brackets.first= brackets[0]; + xml_brackets.second=brackets[1]; + +#if 0 + VERBOSE(1,"XML tags opening and closing brackets for XML input are: " + << xml_brackets.first << " and " + << xml_brackets.second << std::endl); +#endif } - bool - InputOptions:: - init(Parameter const& param) - { - param.SetParameter(input_type, "inputtype", SentenceInput); -#if 0 - if (input_type == SentenceInput) - { VERBOSE(2, "input type is: text input"); } - else if (input_type == ConfusionNetworkInput) - { VERBOSE(2, "input type is: confusion net"); } - else if (input_type == WordLatticeInput) - { VERBOSE(2, "input type is: word lattice"); } - else if (input_type == TreeInputType) - { VERBOSE(2, "input type is: tree"); } - else if (input_type == TabbedSentenceInput) - { VERBOSE(2, "input type is: tabbed sentence"); } - else if (input_type == ForestInputType) - { VERBOSE(2, "input type is: forest"); } -#endif - + pspec = param.GetParam("input-factors"); + if (pspec) factor_order = Scan(*pspec); + if (factor_order.empty()) factor_order.assign(1,0); + param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND); - param.SetParameter(continue_partial_translation, - "continue-partial-translation", false); - - param.SetParameter(xml_policy, "xml-input", XmlPassThrough); - - // specify XML tags opening and closing brackets for XML option - // Do we really want this to be configurable???? UG - const PARAM_VEC *pspec; - pspec = param.GetParam("xml-brackets"); - if (pspec && pspec->size()) - { - std::vector brackets = Tokenize(pspec->at(0)); - if(brackets.size()!=2) - { - std::cerr << "invalid xml-brackets value, " - << "must specify exactly 2 blank-delimited strings " - << "for XML tags opening and closing brackets" - << std::endl; - exit(1); - } - - xml_brackets.first= brackets[0]; - xml_brackets.second=brackets[1]; - -#if 0 - VERBOSE(1,"XML tags opening and closing brackets for XML input are: " - << xml_brackets.first << " and " - << xml_brackets.second << std::endl); -#endif - } + param.SetParameter(factor_delimiter, "factor-delimiter", "|"); + param.SetParameter(input_file_path,"input-file",""); - pspec = param.GetParam("input-factors"); - if (pspec) factor_order = Scan(*pspec); - if (factor_order.empty()) factor_order.assign(1,0); - param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND); - - param.SetParameter(factor_delimiter, "factor-delimiter", "|"); - param.SetParameter(input_file_path,"input-file",""); - - return true; - } + return true; +} #ifdef HAVE_XMLRPC_C - bool - InputOptions:: - update(std::mapconst& param) - { - typedef std::map params_t; - params_t::const_iterator si = param.find("xml-input"); - if (si != param.end()) - xml_policy = Scan(xmlrpc_c::value_string(si->second)); - return true; - } +bool +InputOptions:: +update(std::mapconst& param) +{ + typedef std::map params_t; + params_t::const_iterator si = param.find("xml-input"); + if (si != param.end()) + xml_policy = Scan(xmlrpc_c::value_string(si->second)); + return true; +} #endif } diff --git a/moses2/parameters/InputOptions.h b/moses2/parameters/InputOptions.h index dd3be80e1..6e70e1e1e 100644 --- a/moses2/parameters/InputOptions.h +++ b/moses2/parameters/InputOptions.h @@ -7,26 +7,25 @@ namespace Moses2 { - struct - InputOptions : public OptionsBaseClass - { - bool continue_partial_translation; - InputTypeEnum input_type; - XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive - std::vector factor_order; // input factor order - std::string factor_delimiter; - FactorType placeholder_factor; // where to store original text for placeholders - std::string input_file_path; - std::pair xml_brackets; - // strings to use as XML tags' opening and closing brackets. - // Default are "<" and ">" +struct + InputOptions : public OptionsBaseClass { + bool continue_partial_translation; + InputTypeEnum input_type; + XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive + std::vector factor_order; // input factor order + std::string factor_delimiter; + FactorType placeholder_factor; // where to store original text for placeholders + std::string input_file_path; + std::pair xml_brackets; + // strings to use as XML tags' opening and closing brackets. + // Default are "<" and ">" - InputOptions(); + InputOptions(); - bool init(Parameter const& param); - bool update(std::mapconst& param); + bool init(Parameter const& param); + bool update(std::mapconst& param); - }; +}; } diff --git a/moses2/parameters/LMBR_Options.cpp b/moses2/parameters/LMBR_Options.cpp index 25febd616..a65c071b9 100644 --- a/moses2/parameters/LMBR_Options.cpp +++ b/moses2/parameters/LMBR_Options.cpp @@ -5,33 +5,33 @@ namespace Moses2 { - LMBR_Options:: - LMBR_Options() - : enabled(false) - , use_lattice_hyp_set(false) - , precision(0.8f) - , ratio(0.6f) - , map_weight(0.8f) - , pruning_factor(30) - { } +LMBR_Options:: +LMBR_Options() + : enabled(false) + , use_lattice_hyp_set(false) + , precision(0.8f) + , ratio(0.6f) + , map_weight(0.8f) + , pruning_factor(30) +{ } - bool - LMBR_Options:: - init(Parameter const& param) - { - param.SetParameter(enabled, "lminimum-bayes-risk", false); - - param.SetParameter(ratio, "lmbr-r", 0.6f); - param.SetParameter(precision, "lmbr-p", 0.8f); - param.SetParameter(map_weight, "lmbr-map-weight", 0.0f); - param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30)); - param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false); - - PARAM_VEC const* params = param.GetParam("lmbr-thetas"); - if (params) theta = Scan(*params); - - return true; - } +bool +LMBR_Options:: +init(Parameter const& param) +{ + param.SetParameter(enabled, "lminimum-bayes-risk", false); + + param.SetParameter(ratio, "lmbr-r", 0.6f); + param.SetParameter(precision, "lmbr-p", 0.8f); + param.SetParameter(map_weight, "lmbr-map-weight", 0.0f); + param.SetParameter(pruning_factor, "lmbr-pruning-factor", size_t(30)); + param.SetParameter(use_lattice_hyp_set, "lattice-hypo-set", false); + + PARAM_VEC const* params = param.GetParam("lmbr-thetas"); + if (params) theta = Scan(*params); + + return true; +} diff --git a/moses2/parameters/LMBR_Options.h b/moses2/parameters/LMBR_Options.h index c084f04b9..84e5fd759 100644 --- a/moses2/parameters/LMBR_Options.h +++ b/moses2/parameters/LMBR_Options.h @@ -7,20 +7,19 @@ namespace Moses2 { - // Options for mimum bayes risk decoding - struct - LMBR_Options : public OptionsBaseClass - { - bool enabled; - bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR - float precision; //! unigram precision theta - see Tromble et al 08 for more details - float ratio; //! decaying factor for ngram thetas - see Tromble et al 08 - float map_weight; //! Weight given to the map solution. See Kumar et al 09 - size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice - std::vector theta; //! theta(s) for lattice mbr calculation - bool init(Parameter const& param); - LMBR_Options(); - }; +// Options for mimum bayes risk decoding +struct + LMBR_Options : public OptionsBaseClass { + bool enabled; + bool use_lattice_hyp_set; //! to use nbest as hypothesis set during lattice MBR + float precision; //! unigram precision theta - see Tromble et al 08 for more details + float ratio; //! decaying factor for ngram thetas - see Tromble et al 08 + float map_weight; //! Weight given to the map solution. See Kumar et al 09 + size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice + std::vector theta; //! theta(s) for lattice mbr calculation + bool init(Parameter const& param); + LMBR_Options(); +}; } diff --git a/moses2/parameters/LookupOptions.h b/moses2/parameters/LookupOptions.h index 3728d97d1..58aa733cf 100644 --- a/moses2/parameters/LookupOptions.h +++ b/moses2/parameters/LookupOptions.h @@ -6,12 +6,11 @@ namespace Moses2 { - struct - LookupOptions : public OptionsBaseClass - { - bool init(Parameter const& param); - LookupOptions() {} - }; +struct + LookupOptions : public OptionsBaseClass { + bool init(Parameter const& param); + LookupOptions() {} +}; } diff --git a/moses2/parameters/MBR_Options.cpp b/moses2/parameters/MBR_Options.cpp index 669ee94cc..0e7abd384 100644 --- a/moses2/parameters/MBR_Options.cpp +++ b/moses2/parameters/MBR_Options.cpp @@ -5,22 +5,22 @@ namespace Moses2 { - MBR_Options:: - MBR_Options() - : enabled(false) - , size(200) - , scale(1.0f) - {} +MBR_Options:: +MBR_Options() + : enabled(false) + , size(200) + , scale(1.0f) +{} - bool - MBR_Options:: - init(Parameter const& param) - { - param.SetParameter(enabled, "minimum-bayes-risk", false); - param.SetParameter(size, "mbr-size", 200); - param.SetParameter(scale, "mbr-scale", 1.0f); - return true; - } +bool +MBR_Options:: +init(Parameter const& param) +{ + param.SetParameter(enabled, "minimum-bayes-risk", false); + param.SetParameter(size, "mbr-size", 200); + param.SetParameter(scale, "mbr-scale", 1.0f); + return true; +} } diff --git a/moses2/parameters/MBR_Options.h b/moses2/parameters/MBR_Options.h index 47ff45551..0f8068ca0 100644 --- a/moses2/parameters/MBR_Options.h +++ b/moses2/parameters/MBR_Options.h @@ -5,17 +5,16 @@ namespace Moses2 { - // Options for mimum bayes risk decoding - struct - MBR_Options : public OptionsBaseClass - { - bool enabled; - size_t size; //! number of translation candidates considered - float scale; /*! scaling factor for computing marginal probability +// Options for mimum bayes risk decoding +struct + MBR_Options : public OptionsBaseClass { + bool enabled; + size_t size; //! number of translation candidates considered + float scale; /*! scaling factor for computing marginal probability * of candidate translation */ - bool init(Parameter const& param); - MBR_Options(); - }; + bool init(Parameter const& param); + MBR_Options(); +}; } diff --git a/moses2/parameters/NBestOptions.cpp b/moses2/parameters/NBestOptions.cpp index 0536793b8..d72c155e2 100644 --- a/moses2/parameters/NBestOptions.cpp +++ b/moses2/parameters/NBestOptions.cpp @@ -5,19 +5,19 @@ namespace Moses2 { - NBestOptions:: - NBestOptions() - : nbest_size(0) - , factor(20) - , enabled(false) - , print_trees(false) - , only_distinct(false) - , include_alignment_info(false) - , include_feature_labels(true) - , include_segmentation(false) - , include_passthrough(false) - , include_all_factors(false) - {} +NBestOptions:: +NBestOptions() + : nbest_size(0) + , factor(20) + , enabled(false) + , print_trees(false) + , only_distinct(false) + , include_alignment_info(false) + , include_feature_labels(true) + , include_segmentation(false) + , include_passthrough(false) + , include_all_factors(false) +{} bool @@ -48,9 +48,9 @@ init(Parameter const& P) enabled = output_file_path.size(); return true; } - + #ifdef HAVE_XMLRPC_C -bool +bool NBestOptions:: update(std::mapconst& param) { diff --git a/moses2/parameters/NBestOptions.h b/moses2/parameters/NBestOptions.h index f2e478b84..d3caed425 100644 --- a/moses2/parameters/NBestOptions.h +++ b/moses2/parameters/NBestOptions.h @@ -5,8 +5,7 @@ namespace Moses2 { -struct NBestOptions : public OptionsBaseClass -{ +struct NBestOptions : public OptionsBaseClass { size_t nbest_size; size_t factor; bool enabled; diff --git a/moses2/parameters/OOVHandlingOptions.cpp b/moses2/parameters/OOVHandlingOptions.cpp index 65f79584e..305526386 100644 --- a/moses2/parameters/OOVHandlingOptions.cpp +++ b/moses2/parameters/OOVHandlingOptions.cpp @@ -9,42 +9,42 @@ namespace Moses2 { - OOVHandlingOptions:: - OOVHandlingOptions() - { - drop = false; - mark = false; - prefix = "UNK"; - suffix = ""; - word_deletion_enabled = false; - always_create_direct_transopt = false; - } +OOVHandlingOptions:: +OOVHandlingOptions() +{ + drop = false; + mark = false; + prefix = "UNK"; + suffix = ""; + word_deletion_enabled = false; + always_create_direct_transopt = false; +} - bool - OOVHandlingOptions:: - init(Parameter const& param) - { - param.SetParameter(drop,"drop-unknown",false); - param.SetParameter(mark,"mark-unknown",false); - param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false); - param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false); - param.SetParameter(prefix,"unknown-word-prefix","UNK"); - param.SetParameter(suffix,"unknown-word-suffix",""); - return true; - } +bool +OOVHandlingOptions:: +init(Parameter const& param) +{ + param.SetParameter(drop,"drop-unknown",false); + param.SetParameter(mark,"mark-unknown",false); + param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false); + param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false); + param.SetParameter(prefix,"unknown-word-prefix","UNK"); + param.SetParameter(suffix,"unknown-word-suffix",""); + return true; +} #ifdef HAVE_XMLRPC_C - bool - OOVHandlingOptions:: - update(std::mapconst& param) - { - typedef std::map params_t; - // params_t::const_iterator si = param.find("xml-input"); - // if (si != param.end()) - // xml_policy = Scan(xmlrpc_c::value_string(si->second)); - return true; - } +bool +OOVHandlingOptions:: +update(std::mapconst& param) +{ + typedef std::map params_t; + // params_t::const_iterator si = param.find("xml-input"); + // if (si != param.end()) + // xml_policy = Scan(xmlrpc_c::value_string(si->second)); + return true; +} #endif } diff --git a/moses2/parameters/OOVHandlingOptions.h b/moses2/parameters/OOVHandlingOptions.h index d11284220..1b56d6d93 100644 --- a/moses2/parameters/OOVHandlingOptions.h +++ b/moses2/parameters/OOVHandlingOptions.h @@ -6,22 +6,21 @@ namespace Moses2 { - struct - OOVHandlingOptions : public OptionsBaseClass - { - bool drop; - bool mark; - std::string prefix; - std::string suffix; - - bool word_deletion_enabled; - bool always_create_direct_transopt; - OOVHandlingOptions(); +struct + OOVHandlingOptions : public OptionsBaseClass { + bool drop; + bool mark; + std::string prefix; + std::string suffix; - bool init(Parameter const& param); - bool update(std::mapconst& param); + bool word_deletion_enabled; + bool always_create_direct_transopt; + OOVHandlingOptions(); - }; + bool init(Parameter const& param); + bool update(std::mapconst& param); + +}; } diff --git a/moses2/parameters/OptionsBaseClass.cpp b/moses2/parameters/OptionsBaseClass.cpp index c523a1333..cab6b3cf5 100644 --- a/moses2/parameters/OptionsBaseClass.cpp +++ b/moses2/parameters/OptionsBaseClass.cpp @@ -7,24 +7,24 @@ namespace Moses2 { #ifdef HAVE_XMLRPC_C - bool - OptionsBaseClass:: - update(std::mapconst& params) - { - return true; - } +bool +OptionsBaseClass:: +update(std::mapconst& params) +{ + return true; +} #endif #ifdef HAVE_XMLRPC_C - bool - OptionsBaseClass:: - check(std::map const& param, - std::string const key, bool dfltval) - { - std::map::const_iterator m; - m = param.find(key); - if (m == param.end()) return dfltval; - return Scan(xmlrpc_c::value_string(m->second)); - } +bool +OptionsBaseClass:: +check(std::map const& param, + std::string const key, bool dfltval) +{ + std::map::const_iterator m; + m = param.find(key); + if (m == param.end()) return dfltval; + return Scan(xmlrpc_c::value_string(m->second)); +} #endif } diff --git a/moses2/parameters/OptionsBaseClass.h b/moses2/parameters/OptionsBaseClass.h index 05914ed82..088a05490 100644 --- a/moses2/parameters/OptionsBaseClass.h +++ b/moses2/parameters/OptionsBaseClass.h @@ -7,14 +7,13 @@ namespace Moses2 { class Parameter; - struct OptionsBaseClass - { +struct OptionsBaseClass { #ifdef HAVE_XMLRPC_C - virtual bool - update(std::mapconst& params); + virtual bool + update(std::mapconst& params); #endif - bool - check(std::map const& param, - std::string const key, bool dfltval); - }; + bool + check(std::map const& param, + std::string const key, bool dfltval); +}; } diff --git a/moses2/parameters/ReorderingOptions.cpp b/moses2/parameters/ReorderingOptions.cpp index 5fef5c54a..64e777de7 100644 --- a/moses2/parameters/ReorderingOptions.cpp +++ b/moses2/parameters/ReorderingOptions.cpp @@ -5,27 +5,27 @@ namespace Moses2 { - ReorderingOptions:: - ReorderingOptions() - : max_distortion(-1) - , monotone_at_punct(false) - , use_early_distortion_cost(false) - {} +ReorderingOptions:: +ReorderingOptions() + : max_distortion(-1) + , monotone_at_punct(false) + , use_early_distortion_cost(false) +{} - ReorderingOptions:: - ReorderingOptions(Parameter const& param) - { - init(param); - } - - bool - ReorderingOptions:: - init(Parameter const& param) - { - param.SetParameter(max_distortion, "distortion-limit", -1); - param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false); - param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false); - return true; - } +ReorderingOptions:: +ReorderingOptions(Parameter const& param) +{ + init(param); +} + +bool +ReorderingOptions:: +init(Parameter const& param) +{ + param.SetParameter(max_distortion, "distortion-limit", -1); + param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false); + param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false); + return true; +} } diff --git a/moses2/parameters/ReorderingOptions.h b/moses2/parameters/ReorderingOptions.h index 6bdc1f043..319124e83 100644 --- a/moses2/parameters/ReorderingOptions.h +++ b/moses2/parameters/ReorderingOptions.h @@ -5,16 +5,15 @@ namespace Moses2 { - struct - ReorderingOptions : public OptionsBaseClass - { - int max_distortion; - bool monotone_at_punct; - bool use_early_distortion_cost; - bool init(Parameter const& param); - ReorderingOptions(Parameter const& param); - ReorderingOptions(); - }; +struct + ReorderingOptions : public OptionsBaseClass { + int max_distortion; + bool monotone_at_punct; + bool use_early_distortion_cost; + bool init(Parameter const& param); + ReorderingOptions(Parameter const& param); + ReorderingOptions(); +}; } diff --git a/moses2/parameters/ReportingOptions.cpp b/moses2/parameters/ReportingOptions.cpp index adc27baf2..428cc0566 100644 --- a/moses2/parameters/ReportingOptions.cpp +++ b/moses2/parameters/ReportingOptions.cpp @@ -4,149 +4,149 @@ namespace Moses2 { - using namespace std; +using namespace std; - ReportingOptions:: - ReportingOptions() - : start_translation_id(0) - , ReportAllFactors(false) - , ReportSegmentation(0) - , PrintAlignmentInfo(false) - , PrintAllDerivations(false) - , PrintTranslationOptions(false) - , WA_SortOrder(NoSort) - , WordGraph(false) - , DontPruneSearchGraph(false) - , RecoverPath(false) - , ReportHypoScore(false) - , PrintID(false) - , PrintPassThrough(false) - , include_lhs_in_search_graph(false) - , lattice_sample_size(0) - { - factor_order.assign(1,0); - factor_delimiter = "|"; - } - - bool - ReportingOptions:: - init(Parameter const& param) - { - param.SetParameter(start_translation_id, "start-translation-id", 0); +ReportingOptions:: +ReportingOptions() + : start_translation_id(0) + , ReportAllFactors(false) + , ReportSegmentation(0) + , PrintAlignmentInfo(false) + , PrintAllDerivations(false) + , PrintTranslationOptions(false) + , WA_SortOrder(NoSort) + , WordGraph(false) + , DontPruneSearchGraph(false) + , RecoverPath(false) + , ReportHypoScore(false) + , PrintID(false) + , PrintPassThrough(false) + , include_lhs_in_search_graph(false) + , lattice_sample_size(0) +{ + factor_order.assign(1,0); + factor_delimiter = "|"; +} - // including factors in the output - param.SetParameter(ReportAllFactors, "report-all-factors", false); - - // segmentation reporting - ReportSegmentation = (param.GetParam("report-segmentation-enriched") - ? 2 : param.GetParam("report-segmentation") - ? 1 : 0); - - // word alignment reporting - param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false); - param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort); - std::string e; // hack to save us param.SetParameter(...) - param.SetParameter(AlignmentOutputFile,"alignment-output-file", e); - - - param.SetParameter(PrintAllDerivations, "print-all-derivations", false); - param.SetParameter(PrintTranslationOptions, "print-translation-option", false); - - // output a word graph - PARAM_VEC const* params; - params = param.GetParam("output-word-graph"); - WordGraph = (params && params->size() == 2); // what are the two options? - - // dump the search graph - param.SetParameter(SearchGraph, "output-search-graph", e); - param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e); - param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e); - param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e); +bool +ReportingOptions:: +init(Parameter const& param) +{ + param.SetParameter(start_translation_id, "start-translation-id", 0); + + // including factors in the output + param.SetParameter(ReportAllFactors, "report-all-factors", false); + + // segmentation reporting + ReportSegmentation = (param.GetParam("report-segmentation-enriched") + ? 2 : param.GetParam("report-segmentation") + ? 1 : 0); + + // word alignment reporting + param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false); + param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort); + std::string e; // hack to save us param.SetParameter(...) + param.SetParameter(AlignmentOutputFile,"alignment-output-file", e); + + + param.SetParameter(PrintAllDerivations, "print-all-derivations", false); + param.SetParameter(PrintTranslationOptions, "print-translation-option", false); + + // output a word graph + PARAM_VEC const* params; + params = param.GetParam("output-word-graph"); + WordGraph = (params && params->size() == 2); // what are the two options? + + // dump the search graph + param.SetParameter(SearchGraph, "output-search-graph", e); + param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e); + param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e); + param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e); #ifdef HAVE_PROTOBUF - param.SetParameter(SearchGraphPB, "output-search-graph-pb", e); + param.SetParameter(SearchGraphPB, "output-search-graph-pb", e); #endif - - param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); - param.SetParameter(include_lhs_in_search_graph, - "include-lhs-in-search-graph", false ); - - // miscellaneous - param.SetParameter(RecoverPath, "recover-input-path",false); - param.SetParameter(ReportHypoScore, "output-hypo-score",false); - param.SetParameter(PrintID, "print-id",false); - param.SetParameter(PrintPassThrough, "print-passthrough",false); - param.SetParameter(detailed_all_transrep_filepath, - "translation-all-details", e); - param.SetParameter(detailed_transrep_filepath, "translation-details", e); - param.SetParameter(detailed_tree_transrep_filepath, - "tree-translation-details", e); + param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); + param.SetParameter(include_lhs_in_search_graph, + "include-lhs-in-search-graph", false ); - params = param.GetParam("lattice-samples"); - if (params) { - if (params->size() ==2 ) { - lattice_sample_filepath = params->at(0); - lattice_sample_size = Scan(params->at(1)); - } else { - std::cerr <<"wrong format for switch -lattice-samples file size"; - return false; - } - } - - if (ReportAllFactors) { - factor_order.clear(); - for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) - factor_order.push_back(i); + // miscellaneous + param.SetParameter(RecoverPath, "recover-input-path",false); + param.SetParameter(ReportHypoScore, "output-hypo-score",false); + param.SetParameter(PrintID, "print-id",false); + param.SetParameter(PrintPassThrough, "print-passthrough",false); + param.SetParameter(detailed_all_transrep_filepath, + "translation-all-details", e); + param.SetParameter(detailed_transrep_filepath, "translation-details", e); + param.SetParameter(detailed_tree_transrep_filepath, + "tree-translation-details", e); + + params = param.GetParam("lattice-samples"); + if (params) { + if (params->size() ==2 ) { + lattice_sample_filepath = params->at(0); + lattice_sample_size = Scan(params->at(1)); } else { - params= param.GetParam("output-factors"); - if (params) factor_order = Scan(*params); - if (factor_order.empty()) factor_order.assign(1,0); + std::cerr <<"wrong format for switch -lattice-samples file size"; + return false; } - - param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|")); - param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter); - - return true; } - + + + if (ReportAllFactors) { + factor_order.clear(); + for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) + factor_order.push_back(i); + } else { + params= param.GetParam("output-factors"); + if (params) factor_order = Scan(*params); + if (factor_order.empty()) factor_order.assign(1,0); + } + + param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|")); + param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter); + + return true; +} + #ifdef HAVE_XMLRPC_C - bool - ReportingOptions:: - update(std::mapconst& param) - { - ReportAllFactors = check(param, "report-all-factors", ReportAllFactors); - - - std::map::const_iterator m; - m = param.find("output-factors"); - if (m != param.end()) { - factor_order=Tokenize(xmlrpc_c::value_string(m->second),","); - } +bool +ReportingOptions:: +update(std::mapconst& param) +{ + ReportAllFactors = check(param, "report-all-factors", ReportAllFactors); - if (ReportAllFactors) { - factor_order.clear(); - for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) - factor_order.push_back(i); - } - - m = param.find("align"); - if (m != param.end() && Scan(xmlrpc_c::value_string(m->second))) - ReportSegmentation = 1; - - PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo); - m = param.find("factor-delimiter"); - if (m != param.end()) { - factor_delimiter = Trim(xmlrpc_c::value_string(m->second)); - } - - m = param.find("output-factor-delimiter"); - if (m != param.end()) { - factor_delimiter = Trim(xmlrpc_c::value_string(m->second)); - } - - return true; + std::map::const_iterator m; + m = param.find("output-factors"); + if (m != param.end()) { + factor_order=Tokenize(xmlrpc_c::value_string(m->second),","); } + + if (ReportAllFactors) { + factor_order.clear(); + for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) + factor_order.push_back(i); + } + + m = param.find("align"); + if (m != param.end() && Scan(xmlrpc_c::value_string(m->second))) + ReportSegmentation = 1; + + PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo); + + m = param.find("factor-delimiter"); + if (m != param.end()) { + factor_delimiter = Trim(xmlrpc_c::value_string(m->second)); + } + + m = param.find("output-factor-delimiter"); + if (m != param.end()) { + factor_delimiter = Trim(xmlrpc_c::value_string(m->second)); + } + + return true; +} #endif } diff --git a/moses2/parameters/ReportingOptions.h b/moses2/parameters/ReportingOptions.h index c96809aec..6b491f3b6 100644 --- a/moses2/parameters/ReportingOptions.h +++ b/moses2/parameters/ReportingOptions.h @@ -8,63 +8,62 @@ namespace Moses2 { - struct - ReportingOptions : public OptionsBaseClass - { - long start_translation_id; +struct + ReportingOptions : public OptionsBaseClass { + long start_translation_id; - std::vector factor_order; - std::string factor_delimiter; - - bool ReportAllFactors; // m_reportAllFactors; - int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched + std::vector factor_order; + std::string factor_delimiter; - bool PrintAlignmentInfo; // m_PrintAlignmentInfo - bool PrintAllDerivations; - bool PrintTranslationOptions; + bool ReportAllFactors; // m_reportAllFactors; + int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched - WordAlignmentSort WA_SortOrder; // 0: no, 1: target order - std::string AlignmentOutputFile; - - bool WordGraph; + bool PrintAlignmentInfo; // m_PrintAlignmentInfo + bool PrintAllDerivations; + bool PrintTranslationOptions; - std::string SearchGraph; - std::string SearchGraphExtended; - std::string SearchGraphSLF; - std::string SearchGraphHG; - std::string SearchGraphPB; - bool DontPruneSearchGraph; + WordAlignmentSort WA_SortOrder; // 0: no, 1: target order + std::string AlignmentOutputFile; - bool RecoverPath; // recover input path? - bool ReportHypoScore; + bool WordGraph; - bool PrintID; - bool PrintPassThrough; + std::string SearchGraph; + std::string SearchGraphExtended; + std::string SearchGraphSLF; + std::string SearchGraphHG; + std::string SearchGraphPB; + bool DontPruneSearchGraph; - // transrep = translation reporting - std::string detailed_transrep_filepath; - std::string detailed_tree_transrep_filepath; - std::string detailed_all_transrep_filepath; - bool include_lhs_in_search_graph; + bool RecoverPath; // recover input path? + bool ReportHypoScore; - - std::string lattice_sample_filepath; - size_t lattice_sample_size; + bool PrintID; + bool PrintPassThrough; - bool init(Parameter const& param); + // transrep = translation reporting + std::string detailed_transrep_filepath; + std::string detailed_tree_transrep_filepath; + std::string detailed_all_transrep_filepath; + bool include_lhs_in_search_graph; - /// do we need to keep the search graph from decoding? - bool NeedSearchGraph() const { - return !(SearchGraph.empty() && SearchGraphExtended.empty()); - } + + std::string lattice_sample_filepath; + size_t lattice_sample_size; + + bool init(Parameter const& param); + + /// do we need to keep the search graph from decoding? + bool NeedSearchGraph() const { + return !(SearchGraph.empty() && SearchGraphExtended.empty()); + } #ifdef HAVE_XMLRPC_C - bool update(std::mapconst& param); + bool update(std::mapconst& param); #endif - - ReportingOptions(); - }; - + + ReportingOptions(); +}; + } diff --git a/moses2/parameters/SearchOptions.cpp b/moses2/parameters/SearchOptions.cpp index 98c1789ea..b3a468896 100644 --- a/moses2/parameters/SearchOptions.cpp +++ b/moses2/parameters/SearchOptions.cpp @@ -5,103 +5,102 @@ namespace Moses2 { - SearchOptions:: - SearchOptions() - : algo(Normal) - , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE) - , stack_diversity(0) - , disable_discarding(false) - , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH) - , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE) - , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE) - , beam_width(DEFAULT_BEAM_WIDTH) - , timeout(0) - , consensus(false) - , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD) - , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD) - { } +SearchOptions:: +SearchOptions() + : algo(Normal) + , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE) + , stack_diversity(0) + , disable_discarding(false) + , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH) + , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE) + , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE) + , beam_width(DEFAULT_BEAM_WIDTH) + , timeout(0) + , consensus(false) + , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD) + , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD) +{ } - SearchOptions:: - SearchOptions(Parameter const& param) - : stack_diversity(0) - { - init(param); - } +SearchOptions:: +SearchOptions(Parameter const& param) + : stack_diversity(0) +{ + init(param); +} - bool - SearchOptions:: - init(Parameter const& param) - { - param.SetParameter(algo, "search-algorithm", Normal); - param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE); - param.SetParameter(stack_diversity, "stack-diversity", size_t(0)); - param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH); - param.SetParameter(early_discarding_threshold, "early-discarding-threshold", - DEFAULT_EARLY_DISCARDING_THRESHOLD); - param.SetParameter(timeout, "time-out", 0); - param.SetParameter(max_phrase_length, "max-phrase-length", - DEFAULT_MAX_PHRASE_LENGTH); - param.SetParameter(trans_opt_threshold, "translation-option-threshold", - DEFAULT_TRANSLATION_OPTION_THRESHOLD); - param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage", - DEFAULT_MAX_TRANS_OPT_SIZE); - param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt", - DEFAULT_MAX_PART_TRANS_OPT_SIZE); +bool +SearchOptions:: +init(Parameter const& param) +{ + param.SetParameter(algo, "search-algorithm", Normal); + param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE); + param.SetParameter(stack_diversity, "stack-diversity", size_t(0)); + param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH); + param.SetParameter(early_discarding_threshold, "early-discarding-threshold", + DEFAULT_EARLY_DISCARDING_THRESHOLD); + param.SetParameter(timeout, "time-out", 0); + param.SetParameter(max_phrase_length, "max-phrase-length", + DEFAULT_MAX_PHRASE_LENGTH); + param.SetParameter(trans_opt_threshold, "translation-option-threshold", + DEFAULT_TRANSLATION_OPTION_THRESHOLD); + param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage", + DEFAULT_MAX_TRANS_OPT_SIZE); + param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt", + DEFAULT_MAX_PART_TRANS_OPT_SIZE); - param.SetParameter(consensus, "consensus-decoding", false); - param.SetParameter(disable_discarding, "disable-discarding", false); - - // transformation to log of a few scores - beam_width = TransformScore(beam_width); - trans_opt_threshold = TransformScore(trans_opt_threshold); - early_discarding_threshold = TransformScore(early_discarding_threshold); + param.SetParameter(consensus, "consensus-decoding", false); + param.SetParameter(disable_discarding, "disable-discarding", false); - return true; - } + // transformation to log of a few scores + beam_width = TransformScore(beam_width); + trans_opt_threshold = TransformScore(trans_opt_threshold); + early_discarding_threshold = TransformScore(early_discarding_threshold); - bool - is_syntax(SearchAlgorithm algo) - { - return (algo == CYKPlus || algo == ChartIncremental || - algo == SyntaxS2T || algo == SyntaxT2S || - algo == SyntaxF2S || algo == SyntaxT2S_SCFG); - } + return true; +} + +bool +is_syntax(SearchAlgorithm algo) +{ + return (algo == CYKPlus || algo == ChartIncremental || + algo == SyntaxS2T || algo == SyntaxT2S || + algo == SyntaxF2S || algo == SyntaxT2S_SCFG); +} #ifdef HAVE_XMLRPC_C - bool - SearchOptions:: - update(std::mapconst& params) - { - typedef std::map params_t; +bool +SearchOptions:: +update(std::mapconst& params) +{ + typedef std::map params_t; - params_t::const_iterator si = params.find("search-algorithm"); - if (si != params.end()) - { - // use named parameters - std::string spec = xmlrpc_c::value_string(si->second); - if (spec == "normal" || spec == "0") algo = Normal; - else if (spec == "cube" || spec == "1") algo = CubePruning; - else throw xmlrpc_c::fault("Unsupported search algorithm", - xmlrpc_c::fault::CODE_PARSE); - } + params_t::const_iterator si = params.find("search-algorithm"); + if (si != params.end()) { + // use named parameters + std::string spec = xmlrpc_c::value_string(si->second); + if (spec == "normal" || spec == "0") algo = Normal; + else if (spec == "cube" || spec == "1") algo = CubePruning; + else throw xmlrpc_c::fault("Unsupported search algorithm", + xmlrpc_c::fault::CODE_PARSE); + } - si = params.find("stack"); - if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second); + si = params.find("stack"); + if (si != params.end()) stack_size = xmlrpc_c::value_int(si->second); - si = params.find("stack-diversity"); - if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second); + si = params.find("stack-diversity"); + if (si != params.end()) stack_diversity = xmlrpc_c::value_int(si->second); - si = params.find("beam-threshold"); - if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second); + si = params.find("beam-threshold"); + if (si != params.end()) beam_width = xmlrpc_c::value_double(si->second); - si = params.find("time-out"); - if (si != params.end()) timeout = xmlrpc_c::value_int(si->second); - - si = params.find("max-phrase-length"); - if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second); - - return true; - } + si = params.find("time-out"); + if (si != params.end()) timeout = xmlrpc_c::value_int(si->second); + + si = params.find("max-phrase-length"); + if (si != params.end()) max_phrase_length = xmlrpc_c::value_int(si->second); + + return true; +} #endif } diff --git a/moses2/parameters/SearchOptions.h b/moses2/parameters/SearchOptions.h index 3de0a979a..31e364d14 100644 --- a/moses2/parameters/SearchOptions.h +++ b/moses2/parameters/SearchOptions.h @@ -8,47 +8,46 @@ namespace Moses2 { - bool is_syntax(SearchAlgorithm algo); - - struct - SearchOptions : public OptionsBaseClass - { - SearchAlgorithm algo; - - // stack decoding - size_t stack_size; // maxHypoStackSize; - size_t stack_diversity; // minHypoStackDiversity; - bool disable_discarding; - // Disable discarding of bad hypotheses from HypothesisStackNormal - size_t max_phrase_length; - size_t max_trans_opt_per_cov; - size_t max_partial_trans_opt; - // beam search - float beam_width; +bool is_syntax(SearchAlgorithm algo); - int timeout; +struct + SearchOptions : public OptionsBaseClass { + SearchAlgorithm algo; - bool consensus; //! Use Consensus decoding (DeNero et al 2009) - - // reordering options - // bool reorderingConstraint; //! use additional reordering constraints - // bool useEarlyDistortionCost; + // stack decoding + size_t stack_size; // maxHypoStackSize; + size_t stack_diversity; // minHypoStackDiversity; + bool disable_discarding; + // Disable discarding of bad hypotheses from HypothesisStackNormal + size_t max_phrase_length; + size_t max_trans_opt_per_cov; + size_t max_partial_trans_opt; + // beam search + float beam_width; - float early_discarding_threshold; - float trans_opt_threshold; + int timeout; - bool init(Parameter const& param); - SearchOptions(Parameter const& param); - SearchOptions(); + bool consensus; //! Use Consensus decoding (DeNero et al 2009) - bool - UseEarlyDiscarding() const { - return early_discarding_threshold != -std::numeric_limits::infinity(); - } + // reordering options + // bool reorderingConstraint; //! use additional reordering constraints + // bool useEarlyDistortionCost; - bool - update(std::mapconst& params); + float early_discarding_threshold; + float trans_opt_threshold; - }; + bool init(Parameter const& param); + SearchOptions(Parameter const& param); + SearchOptions(); + + bool + UseEarlyDiscarding() const { + return early_discarding_threshold != -std::numeric_limits::infinity(); + } + + bool + update(std::mapconst& params); + +}; } diff --git a/moses2/parameters/ServerOptions.cpp b/moses2/parameters/ServerOptions.cpp index d8942c5c7..3a21c1891 100644 --- a/moses2/parameters/ServerOptions.cpp +++ b/moses2/parameters/ServerOptions.cpp @@ -14,27 +14,23 @@ namespace Moses2 // If none of 'dhms' is given, it is assumed that it's seconds. // Specs can be combined, e.g. 2h30m, although it's probably nonsense // to be so specific. -size_t +size_t parse_timespec(std::string const& spec) { size_t t = 0, timeout = 0; - BOOST_FOREACH(char const& c, spec) - { - if (c >= '0' && c <= '9') - { - t = t * 10 + c - '0'; - } - else - { - if (c == 'd') timeout = t * 24 * 3600; - else if (c == 'h') timeout += t * 3600; - else if (c == 'm') timeout += t * 60; - else if (c == 's') timeout += t; - else UTIL_THROW2("Can't parse specification '" << spec - << " at " << HERE); - t = 0; - } + BOOST_FOREACH(char const& c, spec) { + if (c >= '0' && c <= '9') { + t = t * 10 + c - '0'; + } else { + if (c == 'd') timeout = t * 24 * 3600; + else if (c == 'h') timeout += t * 3600; + else if (c == 'm') timeout += t * 60; + else if (c == 's') timeout += t; + else UTIL_THROW2("Can't parse specification '" << spec + << " at " << HERE); + t = 0; } + } return timeout; } @@ -54,7 +50,7 @@ ServerOptions() ServerOptions:: ServerOptions(Parameter const& P) -{ +{ init(P); } diff --git a/moses2/parameters/ServerOptions.h b/moses2/parameters/ServerOptions.h index d662d1499..377b4d31b 100644 --- a/moses2/parameters/ServerOptions.h +++ b/moses2/parameters/ServerOptions.h @@ -11,33 +11,31 @@ namespace Moses2 { class Parameter; - struct - ServerOptions - { - bool is_serial; - uint32_t numThreads; // might not be used any more, actually - - size_t sessionTimeout; // this is related to Moses translation sessions - size_t sessionCacheSize; // this is related to Moses translation sessions +struct + ServerOptions { + bool is_serial; + uint32_t numThreads; // might not be used any more, actually - int port; // this is for the abyss server - std::string logfile; // this is for the abyss server - int maxConn; // this is for the abyss server - int maxConnBacklog; // this is for the abyss server - int keepaliveTimeout; // this is for the abyss server - int keepaliveMaxConn; // this is for the abyss server - int timeout; // this is for the abyss server - - bool init(Parameter const& param); - ServerOptions(Parameter const& param); - ServerOptions(); + size_t sessionTimeout; // this is related to Moses translation sessions + size_t sessionCacheSize; // this is related to Moses translation sessions - bool - update(std::mapconst& params) - { - return true; - } + int port; // this is for the abyss server + std::string logfile; // this is for the abyss server + int maxConn; // this is for the abyss server + int maxConnBacklog; // this is for the abyss server + int keepaliveTimeout; // this is for the abyss server + int keepaliveMaxConn; // this is for the abyss server + int timeout; // this is for the abyss server - }; + bool init(Parameter const& param); + ServerOptions(Parameter const& param); + ServerOptions(); + + bool + update(std::mapconst& params) { + return true; + } + +}; } diff --git a/moses2/parameters/SyntaxOptions.cpp b/moses2/parameters/SyntaxOptions.cpp index 0d4b0c7eb..4c6730615 100644 --- a/moses2/parameters/SyntaxOptions.cpp +++ b/moses2/parameters/SyntaxOptions.cpp @@ -11,10 +11,10 @@ namespace Moses2 { SyntaxOptions::SyntaxOptions() -: s2t_parsing_algo(RecursiveCYKPlus) -, default_non_term_only_for_empty_range(false) -, source_label_overlap(SourceLabelOverlapAdd) -, rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE) + : s2t_parsing_algo(RecursiveCYKPlus) + , default_non_term_only_for_empty_range(false) + , source_label_overlap(SourceLabelOverlapAdd) + , rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE) {} bool SyntaxOptions::init(Parameter const& param) diff --git a/moses2/parameters/SyntaxOptions.h b/moses2/parameters/SyntaxOptions.h index 133a1d9ed..c32a0c85c 100644 --- a/moses2/parameters/SyntaxOptions.h +++ b/moses2/parameters/SyntaxOptions.h @@ -19,8 +19,7 @@ typedef std::pair UnknownLHSEntry; typedef std::vector UnknownLHSList; struct -SyntaxOptions : public OptionsBaseClass -{ + SyntaxOptions : public OptionsBaseClass { S2TParsingAlgorithm s2t_parsing_algo; SCFG::Word input_default_non_terminal; SCFG::Word output_default_non_terminal; diff --git a/moses2/pugixml.cpp b/moses2/pugixml.cpp index 737733e64..38f657cb0 100644 --- a/moses2/pugixml.cpp +++ b/moses2/pugixml.cpp @@ -82,7 +82,7 @@ #elif defined(__GNUC__) # define PUGI__NO_INLINE __attribute__((noinline)) #else -# define PUGI__NO_INLINE +# define PUGI__NO_INLINE #endif // Branch weight controls @@ -138,12242 +138,11254 @@ using std::memset; namespace pugi { # ifndef _UINTPTR_T_DEFINED - typedef size_t uintptr_t; +typedef size_t uintptr_t; # endif - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; } #endif // Memory allocation PUGI__NS_BEGIN - PUGI__FN void* default_allocate(size_t size) - { - return malloc(size); - } +PUGI__FN void* default_allocate(size_t size) +{ + return malloc(size); +} - PUGI__FN void default_deallocate(void* ptr) - { - free(ptr); - } +PUGI__FN void default_deallocate(void* ptr) +{ + free(ptr); +} - template - struct xml_memory_management_function_storage - { - static allocation_function allocate; - static deallocation_function deallocate; - }; +template +struct xml_memory_management_function_storage { + static allocation_function allocate; + static deallocation_function deallocate; +}; - // Global allocation functions are stored in class statics so that in header mode linker deduplicates them - // Without a template<> we'll get multiple definitions of the same static - template allocation_function xml_memory_management_function_storage::allocate = default_allocate; - template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate; +// Global allocation functions are stored in class statics so that in header mode linker deduplicates them +// Without a template<> we'll get multiple definitions of the same static +template allocation_function xml_memory_management_function_storage::allocate = default_allocate; +template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate; - typedef xml_memory_management_function_storage xml_memory; +typedef xml_memory_management_function_storage xml_memory; PUGI__NS_END // String utilities PUGI__NS_BEGIN - // Get string length - PUGI__FN size_t strlength(const char_t* s) - { - assert(s); +// Get string length +PUGI__FN size_t strlength(const char_t* s) +{ + assert(s); - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - return strlen(s); - #endif - } +#ifdef PUGIXML_WCHAR_MODE + return wcslen(s); +#else + return strlen(s); +#endif +} - // Compare two strings - PUGI__FN bool strequal(const char_t* src, const char_t* dst) - { - assert(src && dst); +// Compare two strings +PUGI__FN bool strequal(const char_t* src, const char_t* dst) +{ + assert(src && dst); - #ifdef PUGIXML_WCHAR_MODE - return wcscmp(src, dst) == 0; - #else - return strcmp(src, dst) == 0; - #endif - } +#ifdef PUGIXML_WCHAR_MODE + return wcscmp(src, dst) == 0; +#else + return strcmp(src, dst) == 0; +#endif +} - // Compare lhs with [rhs_begin, rhs_end) - PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) - { - for (size_t i = 0; i < count; ++i) - if (lhs[i] != rhs[i]) - return false; - - return lhs[count] == 0; - } +// Compare lhs with [rhs_begin, rhs_end) +PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) +{ + for (size_t i = 0; i < count; ++i) + if (lhs[i] != rhs[i]) + return false; - // Get length of wide string, even if CRT lacks wide character support - PUGI__FN size_t strlength_wide(const wchar_t* s) - { - assert(s); + return lhs[count] == 0; +} - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - const wchar_t* end = s; - while (*end) end++; - return static_cast(end - s); - #endif - } +// Get length of wide string, even if CRT lacks wide character support +PUGI__FN size_t strlength_wide(const wchar_t* s) +{ + assert(s); + +#ifdef PUGIXML_WCHAR_MODE + return wcslen(s); +#else + const wchar_t* end = s; + while (*end) end++; + return static_cast(end - s); +#endif +} PUGI__NS_END // auto_ptr-like object for exception recovery PUGI__NS_BEGIN - template struct auto_deleter - { - T* data; - D deleter; +template struct auto_deleter { + T* data; + D deleter; - auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) - { - } + auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) { + } - ~auto_deleter() - { - if (data) deleter(data); - } + ~auto_deleter() { + if (data) deleter(data); + } - T* release() - { - T* result = data; - data = 0; - return result; - } - }; + T* release() { + T* result = data; + data = 0; + return result; + } +}; PUGI__NS_END #ifdef PUGIXML_COMPACT PUGI__NS_BEGIN - class compact_hash_table - { - public: - compact_hash_table(): _items(0), _capacity(0), _count(0) - { - } +class compact_hash_table +{ +public: + compact_hash_table(): _items(0), _capacity(0), _count(0) { + } - void clear() - { - if (_items) - { - xml_memory::deallocate(_items); - _items = 0; - _capacity = 0; - _count = 0; - } - } + void clear() { + if (_items) { + xml_memory::deallocate(_items); + _items = 0; + _capacity = 0; + _count = 0; + } + } - void** find(const void* key) - { - assert(key); + void** find(const void* key) { + assert(key); - if (_capacity == 0) return 0; + if (_capacity == 0) return 0; - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; - for (size_t probe = 0; probe <= hashmod; ++probe) - { - item_t& probe_item = _items[bucket]; + for (size_t probe = 0; probe <= hashmod; ++probe) { + item_t& probe_item = _items[bucket]; - if (probe_item.key == key) - return &probe_item.value; + if (probe_item.key == key) + return &probe_item.value; - if (probe_item.key == 0) - return 0; + if (probe_item.key == 0) + return 0; - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; - } + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } - assert(!"Hash table is full"); - return 0; - } + assert(!"Hash table is full"); + return 0; + } - void** insert(const void* key) - { - assert(key); - assert(_capacity != 0 && _count < _capacity - _capacity / 4); + void** insert(const void* key) { + assert(key); + assert(_capacity != 0 && _count < _capacity - _capacity / 4); - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; - for (size_t probe = 0; probe <= hashmod; ++probe) - { - item_t& probe_item = _items[bucket]; + for (size_t probe = 0; probe <= hashmod; ++probe) { + item_t& probe_item = _items[bucket]; - if (probe_item.key == 0) - { - probe_item.key = key; - _count++; - return &probe_item.value; - } + if (probe_item.key == 0) { + probe_item.key = key; + _count++; + return &probe_item.value; + } - if (probe_item.key == key) - return &probe_item.value; + if (probe_item.key == key) + return &probe_item.value; - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; - } + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } - assert(!"Hash table is full"); - return 0; - } + assert(!"Hash table is full"); + return 0; + } - bool reserve() - { - if (_count + 16 >= _capacity - _capacity / 4) - return rehash(); + bool reserve() { + if (_count + 16 >= _capacity - _capacity / 4) + return rehash(); - return true; - } + return true; + } - private: - struct item_t - { - const void* key; - void* value; - }; +private: + struct item_t { + const void* key; + void* value; + }; - item_t* _items; - size_t _capacity; + item_t* _items; + size_t _capacity; - size_t _count; + size_t _count; - bool rehash(); + bool rehash(); - static unsigned int hash(const void* key) - { - unsigned int h = static_cast(reinterpret_cast(key)); + static unsigned int hash(const void* key) { + unsigned int h = static_cast(reinterpret_cast(key)); - // MurmurHash3 32-bit finalizer - h ^= h >> 16; - h *= 0x85ebca6bu; - h ^= h >> 13; - h *= 0xc2b2ae35u; - h ^= h >> 16; + // MurmurHash3 32-bit finalizer + h ^= h >> 16; + h *= 0x85ebca6bu; + h ^= h >> 13; + h *= 0xc2b2ae35u; + h ^= h >> 16; - return h; - } - }; + return h; + } +}; - PUGI__FN_NO_INLINE bool compact_hash_table::rehash() - { - compact_hash_table rt; - rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; - rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity)); +PUGI__FN_NO_INLINE bool compact_hash_table::rehash() +{ + compact_hash_table rt; + rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; + rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity)); - if (!rt._items) - return false; + if (!rt._items) + return false; - memset(rt._items, 0, sizeof(item_t) * rt._capacity); + memset(rt._items, 0, sizeof(item_t) * rt._capacity); - for (size_t i = 0; i < _capacity; ++i) - if (_items[i].key) - *rt.insert(_items[i].key) = _items[i].value; + for (size_t i = 0; i < _capacity; ++i) + if (_items[i].key) + *rt.insert(_items[i].key) = _items[i].value; - if (_items) - xml_memory::deallocate(_items); + if (_items) + xml_memory::deallocate(_items); - _capacity = rt._capacity; - _items = rt._items; + _capacity = rt._capacity; + _items = rt._items; - assert(_count == rt._count); + assert(_count == rt._count); - return true; - } + return true; +} PUGI__NS_END #endif PUGI__NS_BEGIN - static const size_t xml_memory_page_size = - #ifdef PUGIXML_MEMORY_PAGE_SIZE - PUGIXML_MEMORY_PAGE_SIZE - #else - 32768 - #endif - ; - -#ifdef PUGIXML_COMPACT - static const uintptr_t xml_memory_block_alignment = 4; - - static const uintptr_t xml_memory_page_alignment = sizeof(void*); +static const size_t xml_memory_page_size = +#ifdef PUGIXML_MEMORY_PAGE_SIZE + PUGIXML_MEMORY_PAGE_SIZE #else - static const uintptr_t xml_memory_block_alignment = sizeof(void*); - - static const uintptr_t xml_memory_page_alignment = 64; - static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); + 32768 #endif - - // extra metadata bits - static const uintptr_t xml_memory_page_contents_shared_mask = 32; - static const uintptr_t xml_memory_page_name_allocated_mask = 16; - static const uintptr_t xml_memory_page_value_allocated_mask = 8; - static const uintptr_t xml_memory_page_type_mask = 7; - - // combined masks for string uniqueness - static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; - static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; + ; #ifdef PUGIXML_COMPACT - #define PUGI__GETPAGE_IMPL(header) (header).get_page() +static const uintptr_t xml_memory_block_alignment = 4; + +static const uintptr_t xml_memory_page_alignment = sizeof(void*); #else - #define PUGI__GETPAGE_IMPL(header) reinterpret_cast((header) & impl::xml_memory_page_pointer_mask) +static const uintptr_t xml_memory_block_alignment = sizeof(void*); + +static const uintptr_t xml_memory_page_alignment = 64; +static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); #endif - #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) - #define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1) +// extra metadata bits +static const uintptr_t xml_memory_page_contents_shared_mask = 32; +static const uintptr_t xml_memory_page_name_allocated_mask = 16; +static const uintptr_t xml_memory_page_value_allocated_mask = 8; +static const uintptr_t xml_memory_page_type_mask = 7; - struct xml_allocator; +// combined masks for string uniqueness +static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; +static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; - struct xml_memory_page - { - static xml_memory_page* construct(void* memory) - { - xml_memory_page* result = static_cast(memory); +#ifdef PUGIXML_COMPACT +#define PUGI__GETPAGE_IMPL(header) (header).get_page() +#else +#define PUGI__GETPAGE_IMPL(header) reinterpret_cast((header) & impl::xml_memory_page_pointer_mask) +#endif - result->allocator = 0; - result->prev = 0; - result->next = 0; - result->busy_size = 0; - result->freed_size = 0; +#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) +#define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1) - #ifdef PUGIXML_COMPACT - result->compact_string_base = 0; - result->compact_shared_parent = 0; - result->compact_page_marker = 0; - #endif +struct xml_allocator; - return result; - } +struct xml_memory_page { + static xml_memory_page* construct(void* memory) { + xml_memory_page* result = static_cast(memory); - xml_allocator* allocator; + result->allocator = 0; + result->prev = 0; + result->next = 0; + result->busy_size = 0; + result->freed_size = 0; - xml_memory_page* prev; - xml_memory_page* next; +#ifdef PUGIXML_COMPACT + result->compact_string_base = 0; + result->compact_shared_parent = 0; + result->compact_page_marker = 0; +#endif - size_t busy_size; - size_t freed_size; + return result; + } - #ifdef PUGIXML_COMPACT - char_t* compact_string_base; - void* compact_shared_parent; - uint32_t* compact_page_marker; - #endif - }; - - struct xml_memory_string_header - { - uint16_t page_offset; // offset from page->data - uint16_t full_size; // 0 if string occupies whole page - }; - - struct xml_allocator - { - xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) - { - #ifdef PUGIXML_COMPACT - _hash = 0; - #endif - } - - xml_memory_page* allocate_page(size_t data_size) - { - size_t size = sizeof(xml_memory_page) + data_size; - - // allocate block with some alignment, leaving memory for worst-case padding - void* memory = xml_memory::allocate(size + xml_memory_page_alignment); - if (!memory) return 0; - - // align to next page boundary (note: this guarantees at least 1 usable byte before the page) - char* page_memory = reinterpret_cast((reinterpret_cast(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1)); - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); - assert(page); - - page->allocator = _root->allocator; - - // record the offset for freeing the memory block - assert(page_memory > memory && page_memory - static_cast(memory) <= 127); - page_memory[-1] = static_cast(page_memory - static_cast(memory)); - - return page; - } - - static void deallocate_page(xml_memory_page* page) - { - char* page_memory = reinterpret_cast(page); - - xml_memory::deallocate(page_memory - page_memory[-1]); - } - - void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); - - void* allocate_memory(size_t size, xml_memory_page*& out_page) - { - if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) - return allocate_memory_oob(size, out_page); - - void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size; - - _busy_size += size; - - out_page = _root; - - return buf; - } - - #ifdef PUGIXML_COMPACT - void* allocate_object(size_t size, xml_memory_page*& out_page) - { - void* result = allocate_memory(size + sizeof(uint32_t), out_page); - if (!result) return 0; - - // adjust for marker - ptrdiff_t offset = static_cast(result) - reinterpret_cast(out_page->compact_page_marker); - - if (PUGI__UNLIKELY(static_cast(offset) >= 256 * xml_memory_block_alignment)) - { - // insert new marker - uint32_t* marker = static_cast(result); - - *marker = static_cast(reinterpret_cast(marker) - reinterpret_cast(out_page)); - out_page->compact_page_marker = marker; - - // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block - // this will make sure deallocate_memory correctly tracks the size - out_page->freed_size += sizeof(uint32_t); - - return marker + 1; - } - else - { - // roll back uint32_t part - _busy_size -= sizeof(uint32_t); - - return result; - } - } - #else - void* allocate_object(size_t size, xml_memory_page*& out_page) - { - return allocate_memory(size, out_page); - } - #endif - - void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) - { - if (page == _root) page->busy_size = _busy_size; - - assert(ptr >= reinterpret_cast(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast(page) + sizeof(xml_memory_page) + page->busy_size); - (void)!ptr; - - page->freed_size += size; - assert(page->freed_size <= page->busy_size); - - if (page->freed_size == page->busy_size) - { - if (page->next == 0) - { - assert(_root == page); - - // top page freed, just reset sizes - page->busy_size = 0; - page->freed_size = 0; - - #ifdef PUGIXML_COMPACT - // reset compact state to maximize efficiency - page->compact_string_base = 0; - page->compact_shared_parent = 0; - page->compact_page_marker = 0; - #endif - - _busy_size = 0; - } - else - { - assert(_root != page); - assert(page->prev); - - // remove from the list - page->prev->next = page->next; - page->next->prev = page->prev; - - // deallocate - deallocate_page(page); - } - } - } - - char_t* allocate_string(size_t length) - { - static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; - - PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); - - // allocate memory for string and header block - size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); - - // round size up to block alignment boundary - size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); - - xml_memory_page* page; - xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); - - if (!header) return 0; - - // setup header - ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page); - - assert(page_offset % xml_memory_block_alignment == 0); - assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset); - header->page_offset = static_cast(static_cast(page_offset) / xml_memory_block_alignment); - - // full_size == 0 for large strings that occupy the whole page - assert(full_size % xml_memory_block_alignment == 0); - assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); - header->full_size = static_cast(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); - - // round-trip through void* to avoid 'cast increases required alignment of target type' warning - // header is guaranteed a pointer-sized alignment, which should be enough for char_t - return static_cast(static_cast(header + 1)); - } - - void deallocate_string(char_t* string) - { - // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings - // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string - - // get header - xml_memory_string_header* header = static_cast(static_cast(string)) - 1; - assert(header); - - // deallocate - size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; - xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset)); - - // if full_size == 0 then this string occupies the whole page - size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; - - deallocate_memory(header, full_size, page); - } - - bool reserve() - { - #ifdef PUGIXML_COMPACT - return _hash->reserve(); - #else - return true; - #endif - } - - xml_memory_page* _root; - size_t _busy_size; - - #ifdef PUGIXML_COMPACT - compact_hash_table* _hash; - #endif - }; - - PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) - { - const size_t large_allocation_threshold = xml_memory_page_size / 4; - - xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); - out_page = page; - - if (!page) return 0; - - if (size <= large_allocation_threshold) - { - _root->busy_size = _busy_size; - - // insert page at the end of linked list - page->prev = _root; - _root->next = page; - _root = page; - - _busy_size = size; - } - else - { - // insert page before the end of linked list, so that it is deleted as soon as possible - // the last page is not deleted even if it's empty (see deallocate_memory) - assert(_root->prev); - - page->prev = _root->prev; - page->next = _root; + xml_allocator* allocator; - _root->prev->next = page; - _root->prev = page; + xml_memory_page* prev; + xml_memory_page* next; - page->busy_size = size; - } + size_t busy_size; + size_t freed_size; - return reinterpret_cast(page) + sizeof(xml_memory_page); - } +#ifdef PUGIXML_COMPACT + char_t* compact_string_base; + void* compact_shared_parent; + uint32_t* compact_page_marker; +#endif +}; + +struct xml_memory_string_header { + uint16_t page_offset; // offset from page->data + uint16_t full_size; // 0 if string occupies whole page +}; + +struct xml_allocator { + xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) { +#ifdef PUGIXML_COMPACT + _hash = 0; +#endif + } + + xml_memory_page* allocate_page(size_t data_size) { + size_t size = sizeof(xml_memory_page) + data_size; + + // allocate block with some alignment, leaving memory for worst-case padding + void* memory = xml_memory::allocate(size + xml_memory_page_alignment); + if (!memory) return 0; + + // align to next page boundary (note: this guarantees at least 1 usable byte before the page) + char* page_memory = reinterpret_cast((reinterpret_cast(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1)); + + // prepare page structure + xml_memory_page* page = xml_memory_page::construct(page_memory); + assert(page); + + page->allocator = _root->allocator; + + // record the offset for freeing the memory block + assert(page_memory > memory && page_memory - static_cast(memory) <= 127); + page_memory[-1] = static_cast(page_memory - static_cast(memory)); + + return page; + } + + static void deallocate_page(xml_memory_page* page) { + char* page_memory = reinterpret_cast(page); + + xml_memory::deallocate(page_memory - page_memory[-1]); + } + + void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); + + void* allocate_memory(size_t size, xml_memory_page*& out_page) { + if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) + return allocate_memory_oob(size, out_page); + + void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size; + + _busy_size += size; + + out_page = _root; + + return buf; + } + +#ifdef PUGIXML_COMPACT + void* allocate_object(size_t size, xml_memory_page*& out_page) { + void* result = allocate_memory(size + sizeof(uint32_t), out_page); + if (!result) return 0; + + // adjust for marker + ptrdiff_t offset = static_cast(result) - reinterpret_cast(out_page->compact_page_marker); + + if (PUGI__UNLIKELY(static_cast(offset) >= 256 * xml_memory_block_alignment)) { + // insert new marker + uint32_t* marker = static_cast(result); + + *marker = static_cast(reinterpret_cast(marker) - reinterpret_cast(out_page)); + out_page->compact_page_marker = marker; + + // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block + // this will make sure deallocate_memory correctly tracks the size + out_page->freed_size += sizeof(uint32_t); + + return marker + 1; + } else { + // roll back uint32_t part + _busy_size -= sizeof(uint32_t); + + return result; + } + } +#else + void* allocate_object(size_t size, xml_memory_page*& out_page) { + return allocate_memory(size, out_page); + } +#endif + + void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) { + if (page == _root) page->busy_size = _busy_size; + + assert(ptr >= reinterpret_cast(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast(page) + sizeof(xml_memory_page) + page->busy_size); + (void)!ptr; + + page->freed_size += size; + assert(page->freed_size <= page->busy_size); + + if (page->freed_size == page->busy_size) { + if (page->next == 0) { + assert(_root == page); + + // top page freed, just reset sizes + page->busy_size = 0; + page->freed_size = 0; + +#ifdef PUGIXML_COMPACT + // reset compact state to maximize efficiency + page->compact_string_base = 0; + page->compact_shared_parent = 0; + page->compact_page_marker = 0; +#endif + + _busy_size = 0; + } else { + assert(_root != page); + assert(page->prev); + + // remove from the list + page->prev->next = page->next; + page->next->prev = page->prev; + + // deallocate + deallocate_page(page); + } + } + } + + char_t* allocate_string(size_t length) { + static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; + + PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); + + // allocate memory for string and header block + size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); + + // round size up to block alignment boundary + size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); + + xml_memory_page* page; + xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); + + if (!header) return 0; + + // setup header + ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page); + + assert(page_offset % xml_memory_block_alignment == 0); + assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset); + header->page_offset = static_cast(static_cast(page_offset) / xml_memory_block_alignment); + + // full_size == 0 for large strings that occupy the whole page + assert(full_size % xml_memory_block_alignment == 0); + assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); + header->full_size = static_cast(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); + + // round-trip through void* to avoid 'cast increases required alignment of target type' warning + // header is guaranteed a pointer-sized alignment, which should be enough for char_t + return static_cast(static_cast(header + 1)); + } + + void deallocate_string(char_t* string) { + // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings + // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string + + // get header + xml_memory_string_header* header = static_cast(static_cast(string)) - 1; + assert(header); + + // deallocate + size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; + xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset)); + + // if full_size == 0 then this string occupies the whole page + size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; + + deallocate_memory(header, full_size, page); + } + + bool reserve() { +#ifdef PUGIXML_COMPACT + return _hash->reserve(); +#else + return true; +#endif + } + + xml_memory_page* _root; + size_t _busy_size; + +#ifdef PUGIXML_COMPACT + compact_hash_table* _hash; +#endif +}; + +PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) +{ + const size_t large_allocation_threshold = xml_memory_page_size / 4; + + xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); + out_page = page; + + if (!page) return 0; + + if (size <= large_allocation_threshold) { + _root->busy_size = _busy_size; + + // insert page at the end of linked list + page->prev = _root; + _root->next = page; + _root = page; + + _busy_size = size; + } else { + // insert page before the end of linked list, so that it is deleted as soon as possible + // the last page is not deleted even if it's empty (see deallocate_memory) + assert(_root->prev); + + page->prev = _root->prev; + page->next = _root; + + _root->prev->next = page; + _root->prev = page; + + page->busy_size = size; + } + + return reinterpret_cast(page) + sizeof(xml_memory_page); +} PUGI__NS_END #ifdef PUGIXML_COMPACT PUGI__NS_BEGIN - static const uintptr_t compact_alignment_log2 = 2; - static const uintptr_t compact_alignment = 1 << compact_alignment_log2; +static const uintptr_t compact_alignment_log2 = 2; +static const uintptr_t compact_alignment = 1 << compact_alignment_log2; - class compact_header - { - public: - compact_header(xml_memory_page* page, unsigned int flags) - { - PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); +class compact_header +{ +public: + compact_header(xml_memory_page* page, unsigned int flags) { + PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); - ptrdiff_t offset = (reinterpret_cast(this) - reinterpret_cast(page->compact_page_marker)); - assert(offset % compact_alignment == 0 && static_cast(offset) < 256 * compact_alignment); + ptrdiff_t offset = (reinterpret_cast(this) - reinterpret_cast(page->compact_page_marker)); + assert(offset % compact_alignment == 0 && static_cast(offset) < 256 * compact_alignment); - _page = static_cast(offset >> compact_alignment_log2); - _flags = static_cast(flags); - } + _page = static_cast(offset >> compact_alignment_log2); + _flags = static_cast(flags); + } - void operator&=(uintptr_t mod) - { - _flags &= static_cast(mod); - } + void operator&=(uintptr_t mod) { + _flags &= static_cast(mod); + } - void operator|=(uintptr_t mod) - { - _flags |= static_cast(mod); - } + void operator|=(uintptr_t mod) { + _flags |= static_cast(mod); + } - uintptr_t operator&(uintptr_t mod) const - { - return _flags & mod; - } + uintptr_t operator&(uintptr_t mod) const { + return _flags & mod; + } - xml_memory_page* get_page() const - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2); - const char* page = page_marker - *reinterpret_cast(static_cast(page_marker)); + xml_memory_page* get_page() const { + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2); + const char* page = page_marker - *reinterpret_cast(static_cast(page_marker)); - return const_cast(reinterpret_cast(static_cast(page))); - } + return const_cast(reinterpret_cast(static_cast(page))); + } - private: - unsigned char _page; - unsigned char _flags; - }; +private: + unsigned char _page; + unsigned char _flags; +}; - PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) - { - const compact_header* header = reinterpret_cast(static_cast(object) - header_offset); +PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) +{ + const compact_header* header = reinterpret_cast(static_cast(object) - header_offset); - return header->get_page(); - } + return header->get_page(); +} - template PUGI__FN_NO_INLINE T* compact_get_value(const void* object) - { - return static_cast(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); - } +template PUGI__FN_NO_INLINE T* compact_get_value(const void* object) +{ + return static_cast(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); +} - template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) - { - *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; - } +template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) +{ + *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; +} - template class compact_pointer - { - public: - compact_pointer(): _data(0) - { - } +template class compact_pointer +{ +public: + compact_pointer(): _data(0) { + } - void operator=(const compact_pointer& rhs) - { - *this = rhs + 0; - } + void operator=(const compact_pointer& rhs) { + *this = rhs + 0; + } - void operator=(T* value) - { - if (value) - { - // value is guaranteed to be compact-aligned; 'this' is not - // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) - // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to - // compensate for arithmetic shift rounding for negative values - ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); - ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; + void operator=(T* value) { + if (value) { + // value is guaranteed to be compact-aligned; 'this' is not + // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) + // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to + // compensate for arithmetic shift rounding for negative values + ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); + ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; - if (static_cast(offset) <= 253) - _data = static_cast(offset + 1); - else - { - compact_set_value(this, value); + if (static_cast(offset) <= 253) + _data = static_cast(offset + 1); + else { + compact_set_value(this, value); - _data = 255; - } - } - else - _data = 0; - } + _data = 255; + } + } else + _data = 0; + } - operator T*() const - { - if (_data) - { - if (_data < 255) - { - uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); + operator T*() const { + if (_data) { + if (_data < 255) { + uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - return reinterpret_cast(base + ((_data - 1 + start) << compact_alignment_log2)); - } - else - return compact_get_value(this); - } - else - return 0; - } + return reinterpret_cast(base + ((_data - 1 + start) << compact_alignment_log2)); + } else + return compact_get_value(this); + } else + return 0; + } - T* operator->() const - { - return *this; - } + T* operator->() const { + return *this; + } - private: - unsigned char _data; - }; +private: + unsigned char _data; +}; - template class compact_pointer_parent - { - public: - compact_pointer_parent(): _data(0) - { - } +template class compact_pointer_parent +{ +public: + compact_pointer_parent(): _data(0) { + } - void operator=(const compact_pointer_parent& rhs) - { - *this = rhs + 0; - } + void operator=(const compact_pointer_parent& rhs) { + *this = rhs + 0; + } - void operator=(T* value) - { - if (value) - { - // value is guaranteed to be compact-aligned; 'this' is not - // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) - // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to - // compensate for arithmetic shift behavior for negative values - ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); - ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; + void operator=(T* value) { + if (value) { + // value is guaranteed to be compact-aligned; 'this' is not + // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) + // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to + // compensate for arithmetic shift behavior for negative values + ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); + ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; - if (static_cast(offset) <= 65533) - { - _data = static_cast(offset + 1); - } - else - { - xml_memory_page* page = compact_get_page(this, header_offset); + if (static_cast(offset) <= 65533) { + _data = static_cast(offset + 1); + } else { + xml_memory_page* page = compact_get_page(this, header_offset); - if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) - page->compact_shared_parent = value; + if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) + page->compact_shared_parent = value; - if (page->compact_shared_parent == value) - { - _data = 65534; - } - else - { - compact_set_value(this, value); + if (page->compact_shared_parent == value) { + _data = 65534; + } else { + compact_set_value(this, value); - _data = 65535; - } - } - } - else - { - _data = 0; - } - } + _data = 65535; + } + } + } else { + _data = 0; + } + } - operator T*() const - { - if (_data) - { - if (_data < 65534) - { - uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); + operator T*() const { + if (_data) { + if (_data < 65534) { + uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - return reinterpret_cast(base + ((_data - 1 - 65533) << compact_alignment_log2)); - } - else if (_data == 65534) - return static_cast(compact_get_page(this, header_offset)->compact_shared_parent); - else - return compact_get_value(this); - } - else - return 0; - } + return reinterpret_cast(base + ((_data - 1 - 65533) << compact_alignment_log2)); + } else if (_data == 65534) + return static_cast(compact_get_page(this, header_offset)->compact_shared_parent); + else + return compact_get_value(this); + } else + return 0; + } - T* operator->() const - { - return *this; - } + T* operator->() const { + return *this; + } - private: - uint16_t _data; - }; +private: + uint16_t _data; +}; - template class compact_string - { - public: - compact_string(): _data(0) - { - } +template class compact_string +{ +public: + compact_string(): _data(0) { + } - void operator=(const compact_string& rhs) - { - *this = rhs + 0; - } + void operator=(const compact_string& rhs) { + *this = rhs + 0; + } - void operator=(char_t* value) - { - if (value) - { - xml_memory_page* page = compact_get_page(this, header_offset); + void operator=(char_t* value) { + if (value) { + xml_memory_page* page = compact_get_page(this, header_offset); - if (PUGI__UNLIKELY(page->compact_string_base == 0)) - page->compact_string_base = value; + if (PUGI__UNLIKELY(page->compact_string_base == 0)) + page->compact_string_base = value; - ptrdiff_t offset = value - page->compact_string_base; + ptrdiff_t offset = value - page->compact_string_base; - if (static_cast(offset) < (65535 << 7)) - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); + if (static_cast(offset) < (65535 << 7)) { + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); - if (*base == 0) - { - *base = static_cast((offset >> 7) + 1); - _data = static_cast((offset & 127) + 1); - } - else - { - ptrdiff_t remainder = offset - ((*base - 1) << 7); + if (*base == 0) { + *base = static_cast((offset >> 7) + 1); + _data = static_cast((offset & 127) + 1); + } else { + ptrdiff_t remainder = offset - ((*base - 1) << 7); - if (static_cast(remainder) <= 253) - { - _data = static_cast(remainder + 1); - } - else - { - compact_set_value(this, value); + if (static_cast(remainder) <= 253) { + _data = static_cast(remainder + 1); + } else { + compact_set_value(this, value); - _data = 255; - } - } - } - else - { - compact_set_value(this, value); + _data = 255; + } + } + } else { + compact_set_value(this, value); - _data = 255; - } - } - else - { - _data = 0; - } - } + _data = 255; + } + } else { + _data = 0; + } + } - operator char_t*() const - { - if (_data) - { - if (_data < 255) - { - xml_memory_page* page = compact_get_page(this, header_offset); + operator char_t*() const { + if (_data) { + if (_data < 255) { + xml_memory_page* page = compact_get_page(this, header_offset); - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); - assert(*base); + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); + assert(*base); - ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); + ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); - return page->compact_string_base + offset; - } - else - { - return compact_get_value(this); - } - } - else - return 0; - } + return page->compact_string_base + offset; + } else { + return compact_get_value(this); + } + } else + return 0; + } - private: - unsigned char _data; - }; +private: + unsigned char _data; +}; PUGI__NS_END #endif #ifdef PUGIXML_COMPACT namespace pugi { - struct xml_attribute_struct - { - xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) - { - PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); - } +struct xml_attribute_struct { + xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) { + PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); + } - impl::compact_header header; + impl::compact_header header; - uint16_t namevalue_base; + uint16_t namevalue_base; - impl::compact_string<4, 2> name; - impl::compact_string<5, 3> value; + impl::compact_string<4, 2> name; + impl::compact_string<5, 3> value; - impl::compact_pointer prev_attribute_c; - impl::compact_pointer next_attribute; - }; + impl::compact_pointer prev_attribute_c; + impl::compact_pointer next_attribute; +}; - struct xml_node_struct - { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) - { - PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); - } +struct xml_node_struct { + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) { + PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); + } - impl::compact_header header; + impl::compact_header header; - uint16_t namevalue_base; + uint16_t namevalue_base; - impl::compact_string<4, 2> name; - impl::compact_string<5, 3> value; + impl::compact_string<4, 2> name; + impl::compact_string<5, 3> value; - impl::compact_pointer_parent parent; + impl::compact_pointer_parent parent; - impl::compact_pointer first_child; + impl::compact_pointer first_child; - impl::compact_pointer prev_sibling_c; - impl::compact_pointer next_sibling; + impl::compact_pointer prev_sibling_c; + impl::compact_pointer next_sibling; - impl::compact_pointer first_attribute; - }; + impl::compact_pointer first_attribute; +}; } #else namespace pugi { - struct xml_attribute_struct - { - xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) - { - } +struct xml_attribute_struct { + xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) { + } - uintptr_t header; + uintptr_t header; - char_t* name; - char_t* value; + char_t* name; + char_t* value; - xml_attribute_struct* prev_attribute_c; - xml_attribute_struct* next_attribute; - }; + xml_attribute_struct* prev_attribute_c; + xml_attribute_struct* next_attribute; +}; - struct xml_node_struct - { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) - { - } +struct xml_node_struct { + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { + } - uintptr_t header; + uintptr_t header; - char_t* name; - char_t* value; + char_t* name; + char_t* value; - xml_node_struct* parent; + xml_node_struct* parent; - xml_node_struct* first_child; + xml_node_struct* first_child; - xml_node_struct* prev_sibling_c; - xml_node_struct* next_sibling; + xml_node_struct* prev_sibling_c; + xml_node_struct* next_sibling; - xml_attribute_struct* first_attribute; - }; + xml_attribute_struct* first_attribute; +}; } #endif PUGI__NS_BEGIN - struct xml_extra_buffer - { - char_t* buffer; - xml_extra_buffer* next; - }; +struct xml_extra_buffer { + char_t* buffer; + xml_extra_buffer* next; +}; - struct xml_document_struct: public xml_node_struct, public xml_allocator - { - xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) - { - #ifdef PUGIXML_COMPACT - _hash = &hash; - #endif - } +struct xml_document_struct: public xml_node_struct, public xml_allocator { + xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) { +#ifdef PUGIXML_COMPACT + _hash = &hash; +#endif + } - const char_t* buffer; + const char_t* buffer; - xml_extra_buffer* extra_buffers; + xml_extra_buffer* extra_buffers; - #ifdef PUGIXML_COMPACT - compact_hash_table hash; - #endif - }; +#ifdef PUGIXML_COMPACT + compact_hash_table hash; +#endif +}; - template inline xml_allocator& get_allocator(const Object* object) - { - assert(object); +template inline xml_allocator& get_allocator(const Object* object) +{ + assert(object); - return *PUGI__GETPAGE(object)->allocator; - } + return *PUGI__GETPAGE(object)->allocator; +} - template inline xml_document_struct& get_document(const Object* object) - { - assert(object); +template inline xml_document_struct& get_document(const Object* object) +{ + assert(object); - return *static_cast(PUGI__GETPAGE(object)->allocator); - } + return *static_cast(PUGI__GETPAGE(object)->allocator); +} PUGI__NS_END // Low-level DOM operations PUGI__NS_BEGIN - inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) - { - xml_memory_page* page; - void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); - if (!memory) return 0; +inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) +{ + xml_memory_page* page; + void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); + if (!memory) return 0; - return new (memory) xml_attribute_struct(page); - } + return new (memory) xml_attribute_struct(page); +} - inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) - { - xml_memory_page* page; - void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); - if (!memory) return 0; +inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) +{ + xml_memory_page* page; + void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); + if (!memory) return 0; - return new (memory) xml_node_struct(page, type); - } + return new (memory) xml_node_struct(page, type); +} - inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) - { - if (a->header & impl::xml_memory_page_name_allocated_mask) - alloc.deallocate_string(a->name); +inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) +{ + if (a->header & impl::xml_memory_page_name_allocated_mask) + alloc.deallocate_string(a->name); - if (a->header & impl::xml_memory_page_value_allocated_mask) - alloc.deallocate_string(a->value); + if (a->header & impl::xml_memory_page_value_allocated_mask) + alloc.deallocate_string(a->value); - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); - } + alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); +} - inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) - { - if (n->header & impl::xml_memory_page_name_allocated_mask) - alloc.deallocate_string(n->name); +inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) +{ + if (n->header & impl::xml_memory_page_name_allocated_mask) + alloc.deallocate_string(n->name); - if (n->header & impl::xml_memory_page_value_allocated_mask) - alloc.deallocate_string(n->value); + if (n->header & impl::xml_memory_page_value_allocated_mask) + alloc.deallocate_string(n->value); - for (xml_attribute_struct* attr = n->first_attribute; attr; ) - { - xml_attribute_struct* next = attr->next_attribute; + for (xml_attribute_struct* attr = n->first_attribute; attr; ) { + xml_attribute_struct* next = attr->next_attribute; - destroy_attribute(attr, alloc); + destroy_attribute(attr, alloc); - attr = next; - } + attr = next; + } - for (xml_node_struct* child = n->first_child; child; ) - { - xml_node_struct* next = child->next_sibling; + for (xml_node_struct* child = n->first_child; child; ) { + xml_node_struct* next = child->next_sibling; - destroy_node(child, alloc); + destroy_node(child, alloc); - child = next; - } + child = next; + } - alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); - } + alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); +} - inline void append_node(xml_node_struct* child, xml_node_struct* node) - { - child->parent = node; +inline void append_node(xml_node_struct* child, xml_node_struct* node) +{ + child->parent = node; - xml_node_struct* head = node->first_child; + xml_node_struct* head = node->first_child; - if (head) - { - xml_node_struct* tail = head->prev_sibling_c; + if (head) { + xml_node_struct* tail = head->prev_sibling_c; - tail->next_sibling = child; - child->prev_sibling_c = tail; - head->prev_sibling_c = child; - } - else - { - node->first_child = child; - child->prev_sibling_c = child; - } - } + tail->next_sibling = child; + child->prev_sibling_c = tail; + head->prev_sibling_c = child; + } else { + node->first_child = child; + child->prev_sibling_c = child; + } +} - inline void prepend_node(xml_node_struct* child, xml_node_struct* node) - { - child->parent = node; +inline void prepend_node(xml_node_struct* child, xml_node_struct* node) +{ + child->parent = node; - xml_node_struct* head = node->first_child; + xml_node_struct* head = node->first_child; - if (head) - { - child->prev_sibling_c = head->prev_sibling_c; - head->prev_sibling_c = child; - } - else - child->prev_sibling_c = child; + if (head) { + child->prev_sibling_c = head->prev_sibling_c; + head->prev_sibling_c = child; + } else + child->prev_sibling_c = child; - child->next_sibling = head; - node->first_child = child; - } + child->next_sibling = head; + node->first_child = child; +} - inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) - { - xml_node_struct* parent = node->parent; +inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) +{ + xml_node_struct* parent = node->parent; - child->parent = parent; + child->parent = parent; - if (node->next_sibling) - node->next_sibling->prev_sibling_c = child; - else - parent->first_child->prev_sibling_c = child; + if (node->next_sibling) + node->next_sibling->prev_sibling_c = child; + else + parent->first_child->prev_sibling_c = child; - child->next_sibling = node->next_sibling; - child->prev_sibling_c = node; + child->next_sibling = node->next_sibling; + child->prev_sibling_c = node; - node->next_sibling = child; - } + node->next_sibling = child; +} - inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) - { - xml_node_struct* parent = node->parent; +inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) +{ + xml_node_struct* parent = node->parent; - child->parent = parent; + child->parent = parent; - if (node->prev_sibling_c->next_sibling) - node->prev_sibling_c->next_sibling = child; - else - parent->first_child = child; + if (node->prev_sibling_c->next_sibling) + node->prev_sibling_c->next_sibling = child; + else + parent->first_child = child; - child->prev_sibling_c = node->prev_sibling_c; - child->next_sibling = node; + child->prev_sibling_c = node->prev_sibling_c; + child->next_sibling = node; - node->prev_sibling_c = child; - } + node->prev_sibling_c = child; +} - inline void remove_node(xml_node_struct* node) - { - xml_node_struct* parent = node->parent; +inline void remove_node(xml_node_struct* node) +{ + xml_node_struct* parent = node->parent; - if (node->next_sibling) - node->next_sibling->prev_sibling_c = node->prev_sibling_c; - else - parent->first_child->prev_sibling_c = node->prev_sibling_c; + if (node->next_sibling) + node->next_sibling->prev_sibling_c = node->prev_sibling_c; + else + parent->first_child->prev_sibling_c = node->prev_sibling_c; - if (node->prev_sibling_c->next_sibling) - node->prev_sibling_c->next_sibling = node->next_sibling; - else - parent->first_child = node->next_sibling; + if (node->prev_sibling_c->next_sibling) + node->prev_sibling_c->next_sibling = node->next_sibling; + else + parent->first_child = node->next_sibling; - node->parent = 0; - node->prev_sibling_c = 0; - node->next_sibling = 0; - } + node->parent = 0; + node->prev_sibling_c = 0; + node->next_sibling = 0; +} - inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - xml_attribute_struct* head = node->first_attribute; +inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) +{ + xml_attribute_struct* head = node->first_attribute; - if (head) - { - xml_attribute_struct* tail = head->prev_attribute_c; + if (head) { + xml_attribute_struct* tail = head->prev_attribute_c; - tail->next_attribute = attr; - attr->prev_attribute_c = tail; - head->prev_attribute_c = attr; - } - else - { - node->first_attribute = attr; - attr->prev_attribute_c = attr; - } - } + tail->next_attribute = attr; + attr->prev_attribute_c = tail; + head->prev_attribute_c = attr; + } else { + node->first_attribute = attr; + attr->prev_attribute_c = attr; + } +} - inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - xml_attribute_struct* head = node->first_attribute; +inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) +{ + xml_attribute_struct* head = node->first_attribute; - if (head) - { - attr->prev_attribute_c = head->prev_attribute_c; - head->prev_attribute_c = attr; - } - else - attr->prev_attribute_c = attr; + if (head) { + attr->prev_attribute_c = head->prev_attribute_c; + head->prev_attribute_c = attr; + } else + attr->prev_attribute_c = attr; - attr->next_attribute = head; - node->first_attribute = attr; - } + attr->next_attribute = head; + node->first_attribute = attr; +} - inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) - { - if (place->next_attribute) - place->next_attribute->prev_attribute_c = attr; - else - node->first_attribute->prev_attribute_c = attr; +inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) +{ + if (place->next_attribute) + place->next_attribute->prev_attribute_c = attr; + else + node->first_attribute->prev_attribute_c = attr; - attr->next_attribute = place->next_attribute; - attr->prev_attribute_c = place; - place->next_attribute = attr; - } + attr->next_attribute = place->next_attribute; + attr->prev_attribute_c = place; + place->next_attribute = attr; +} - inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) - { - if (place->prev_attribute_c->next_attribute) - place->prev_attribute_c->next_attribute = attr; - else - node->first_attribute = attr; +inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) +{ + if (place->prev_attribute_c->next_attribute) + place->prev_attribute_c->next_attribute = attr; + else + node->first_attribute = attr; - attr->prev_attribute_c = place->prev_attribute_c; - attr->next_attribute = place; - place->prev_attribute_c = attr; - } + attr->prev_attribute_c = place->prev_attribute_c; + attr->next_attribute = place; + place->prev_attribute_c = attr; +} - inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - if (attr->next_attribute) - attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; - else - node->first_attribute->prev_attribute_c = attr->prev_attribute_c; +inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) +{ + if (attr->next_attribute) + attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; + else + node->first_attribute->prev_attribute_c = attr->prev_attribute_c; - if (attr->prev_attribute_c->next_attribute) - attr->prev_attribute_c->next_attribute = attr->next_attribute; - else - node->first_attribute = attr->next_attribute; + if (attr->prev_attribute_c->next_attribute) + attr->prev_attribute_c->next_attribute = attr->next_attribute; + else + node->first_attribute = attr->next_attribute; - attr->prev_attribute_c = 0; - attr->next_attribute = 0; - } + attr->prev_attribute_c = 0; + attr->next_attribute = 0; +} - PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) - { - if (!alloc.reserve()) return 0; +PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) +{ + if (!alloc.reserve()) return 0; - xml_node_struct* child = allocate_node(alloc, type); - if (!child) return 0; + xml_node_struct* child = allocate_node(alloc, type); + if (!child) return 0; - append_node(child, node); + append_node(child, node); - return child; - } + return child; +} - PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) - { - if (!alloc.reserve()) return 0; +PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) +{ + if (!alloc.reserve()) return 0; - xml_attribute_struct* attr = allocate_attribute(alloc); - if (!attr) return 0; + xml_attribute_struct* attr = allocate_attribute(alloc); + if (!attr) return 0; - append_attribute(attr, node); + append_attribute(attr, node); - return attr; - } + return attr; +} PUGI__NS_END // Helper classes for code generation PUGI__NS_BEGIN - struct opt_false - { - enum { value = 0 }; - }; +struct opt_false { + enum { value = 0 }; +}; - struct opt_true - { - enum { value = 1 }; - }; +struct opt_true { + enum { value = 1 }; +}; PUGI__NS_END // Unicode utilities PUGI__NS_BEGIN - inline uint16_t endian_swap(uint16_t value) - { - return static_cast(((value & 0xff) << 8) | (value >> 8)); - } +inline uint16_t endian_swap(uint16_t value) +{ + return static_cast(((value & 0xff) << 8) | (value >> 8)); +} - inline uint32_t endian_swap(uint32_t value) - { - return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); - } +inline uint32_t endian_swap(uint32_t value) +{ + return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); +} - struct utf8_counter - { - typedef size_t value_type; +struct utf8_counter { + typedef size_t value_type; - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) return result + 1; - // U+0080..U+07FF - else if (ch < 0x800) return result + 2; - // U+0800..U+FFFF - else return result + 3; - } + static value_type low(value_type result, uint32_t ch) { + // U+0000..U+007F + if (ch < 0x80) return result + 1; + // U+0080..U+07FF + else if (ch < 0x800) return result + 2; + // U+0800..U+FFFF + else return result + 3; + } - static value_type high(value_type result, uint32_t) - { - // U+10000..U+10FFFF - return result + 4; - } - }; + static value_type high(value_type result, uint32_t) { + // U+10000..U+10FFFF + return result + 4; + } +}; - struct utf8_writer - { - typedef uint8_t* value_type; +struct utf8_writer { + typedef uint8_t* value_type; - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) - { - *result = static_cast(ch); - return result + 1; - } - // U+0080..U+07FF - else if (ch < 0x800) - { - result[0] = static_cast(0xC0 | (ch >> 6)); - result[1] = static_cast(0x80 | (ch & 0x3F)); - return result + 2; - } - // U+0800..U+FFFF - else - { - result[0] = static_cast(0xE0 | (ch >> 12)); - result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[2] = static_cast(0x80 | (ch & 0x3F)); - return result + 3; - } - } + static value_type low(value_type result, uint32_t ch) { + // U+0000..U+007F + if (ch < 0x80) { + *result = static_cast(ch); + return result + 1; + } + // U+0080..U+07FF + else if (ch < 0x800) { + result[0] = static_cast(0xC0 | (ch >> 6)); + result[1] = static_cast(0x80 | (ch & 0x3F)); + return result + 2; + } + // U+0800..U+FFFF + else { + result[0] = static_cast(0xE0 | (ch >> 12)); + result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[2] = static_cast(0x80 | (ch & 0x3F)); + return result + 3; + } + } - static value_type high(value_type result, uint32_t ch) - { - // U+10000..U+10FFFF - result[0] = static_cast(0xF0 | (ch >> 18)); - result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[3] = static_cast(0x80 | (ch & 0x3F)); - return result + 4; - } + static value_type high(value_type result, uint32_t ch) { + // U+10000..U+10FFFF + result[0] = static_cast(0xF0 | (ch >> 18)); + result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); + result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + result[3] = static_cast(0x80 | (ch & 0x3F)); + return result + 4; + } - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; + static value_type any(value_type result, uint32_t ch) { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } +}; - struct utf16_counter - { - typedef size_t value_type; +struct utf16_counter { + typedef size_t value_type; - static value_type low(value_type result, uint32_t) - { - return result + 1; - } + static value_type low(value_type result, uint32_t) { + return result + 1; + } - static value_type high(value_type result, uint32_t) - { - return result + 2; - } - }; + static value_type high(value_type result, uint32_t) { + return result + 2; + } +}; - struct utf16_writer - { - typedef uint16_t* value_type; +struct utf16_writer { + typedef uint16_t* value_type; - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch); + static value_type low(value_type result, uint32_t ch) { + *result = static_cast(ch); - return result + 1; - } + return result + 1; + } - static value_type high(value_type result, uint32_t ch) - { - uint32_t msh = static_cast(ch - 0x10000) >> 10; - uint32_t lsh = static_cast(ch - 0x10000) & 0x3ff; + static value_type high(value_type result, uint32_t ch) { + uint32_t msh = static_cast(ch - 0x10000) >> 10; + uint32_t lsh = static_cast(ch - 0x10000) & 0x3ff; - result[0] = static_cast(0xD800 + msh); - result[1] = static_cast(0xDC00 + lsh); + result[0] = static_cast(0xD800 + msh); + result[1] = static_cast(0xDC00 + lsh); - return result + 2; - } + return result + 2; + } - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; + static value_type any(value_type result, uint32_t ch) { + return (ch < 0x10000) ? low(result, ch) : high(result, ch); + } +}; - struct utf32_counter - { - typedef size_t value_type; +struct utf32_counter { + typedef size_t value_type; - static value_type low(value_type result, uint32_t) - { - return result + 1; - } + static value_type low(value_type result, uint32_t) { + return result + 1; + } - static value_type high(value_type result, uint32_t) - { - return result + 1; - } - }; + static value_type high(value_type result, uint32_t) { + return result + 1; + } +}; - struct utf32_writer - { - typedef uint32_t* value_type; +struct utf32_writer { + typedef uint32_t* value_type; - static value_type low(value_type result, uint32_t ch) - { - *result = ch; + static value_type low(value_type result, uint32_t ch) { + *result = ch; - return result + 1; - } + return result + 1; + } - static value_type high(value_type result, uint32_t ch) - { - *result = ch; + static value_type high(value_type result, uint32_t ch) { + *result = ch; - return result + 1; - } + return result + 1; + } - static value_type any(value_type result, uint32_t ch) - { - *result = ch; + static value_type any(value_type result, uint32_t ch) { + *result = ch; - return result + 1; - } - }; + return result + 1; + } +}; - struct latin1_writer - { - typedef uint8_t* value_type; +struct latin1_writer { + typedef uint8_t* value_type; - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch > 255 ? '?' : ch); + static value_type low(value_type result, uint32_t ch) { + *result = static_cast(ch > 255 ? '?' : ch); - return result + 1; - } + return result + 1; + } - static value_type high(value_type result, uint32_t ch) - { - (void)ch; + static value_type high(value_type result, uint32_t ch) { + (void)ch; - *result = '?'; + *result = '?'; - return result + 1; - } - }; + return result + 1; + } +}; - struct utf8_decoder - { - typedef uint8_t type; +struct utf8_decoder { + typedef uint8_t type; - template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) - { - const uint8_t utf8_byte_mask = 0x3f; + template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) { + const uint8_t utf8_byte_mask = 0x3f; - while (size) - { - uint8_t lead = *data; + while (size) { + uint8_t lead = *data; - // 0xxxxxxx -> U+0000..U+007F - if (lead < 0x80) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; + // 0xxxxxxx -> U+0000..U+007F + if (lead < 0x80) { + result = Traits::low(result, lead); + data += 1; + size -= 1; - // process aligned single-byte (ascii) blocks - if ((reinterpret_cast(data) & 3) == 0) - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - while (size >= 4 && (*static_cast(static_cast(data)) & 0x80808080) == 0) - { - result = Traits::low(result, data[0]); - result = Traits::low(result, data[1]); - result = Traits::low(result, data[2]); - result = Traits::low(result, data[3]); - data += 4; - size -= 4; - } - } - } - // 110xxxxx -> U+0080..U+07FF - else if (static_cast(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); - data += 2; - size -= 2; - } - // 1110xxxx -> U+0800-U+FFFF - else if (static_cast(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); - data += 3; - size -= 3; - } - // 11110xxx -> U+10000..U+10FFFF - else if (static_cast(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) - { - result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); - data += 4; - size -= 4; - } - // 10xxxxxx or 11111xxx -> invalid - else - { - data += 1; - size -= 1; - } - } + // process aligned single-byte (ascii) blocks + if ((reinterpret_cast(data) & 3) == 0) { + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + while (size >= 4 && (*static_cast(static_cast(data)) & 0x80808080) == 0) { + result = Traits::low(result, data[0]); + result = Traits::low(result, data[1]); + result = Traits::low(result, data[2]); + result = Traits::low(result, data[3]); + data += 4; + size -= 4; + } + } + } + // 110xxxxx -> U+0080..U+07FF + else if (static_cast(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) { + result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); + data += 2; + size -= 2; + } + // 1110xxxx -> U+0800-U+FFFF + else if (static_cast(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) { + result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); + data += 3; + size -= 3; + } + // 11110xxx -> U+10000..U+10FFFF + else if (static_cast(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) { + result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); + data += 4; + size -= 4; + } + // 10xxxxxx or 11111xxx -> invalid + else { + data += 1; + size -= 1; + } + } - return result; - } - }; + return result; + } +}; - template struct utf16_decoder - { - typedef uint16_t type; +template struct utf16_decoder { + typedef uint16_t type; - template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; + template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) { + while (size) { + uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; - // U+0000..U+D7FF - if (lead < 0xD800) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // U+E000..U+FFFF - else if (static_cast(lead - 0xE000) < 0x2000) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // surrogate pair lead - else if (static_cast(lead - 0xD800) < 0x400 && size >= 2) - { - uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; + // U+0000..U+D7FF + if (lead < 0xD800) { + result = Traits::low(result, lead); + data += 1; + size -= 1; + } + // U+E000..U+FFFF + else if (static_cast(lead - 0xE000) < 0x2000) { + result = Traits::low(result, lead); + data += 1; + size -= 1; + } + // surrogate pair lead + else if (static_cast(lead - 0xD800) < 0x400 && size >= 2) { + uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; - if (static_cast(next - 0xDC00) < 0x400) - { - result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); - data += 2; - size -= 2; - } - else - { - data += 1; - size -= 1; - } - } - else - { - data += 1; - size -= 1; - } - } + if (static_cast(next - 0xDC00) < 0x400) { + result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); + data += 2; + size -= 2; + } else { + data += 1; + size -= 1; + } + } else { + data += 1; + size -= 1; + } + } - return result; - } - }; + return result; + } +}; - template struct utf32_decoder - { - typedef uint32_t type; +template struct utf32_decoder { + typedef uint32_t type; - template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; + template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) { + while (size) { + uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; - // U+0000..U+FFFF - if (lead < 0x10000) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // U+10000..U+10FFFF - else - { - result = Traits::high(result, lead); - data += 1; - size -= 1; - } - } + // U+0000..U+FFFF + if (lead < 0x10000) { + result = Traits::low(result, lead); + data += 1; + size -= 1; + } + // U+10000..U+10FFFF + else { + result = Traits::high(result, lead); + data += 1; + size -= 1; + } + } - return result; - } - }; + return result; + } +}; - struct latin1_decoder - { - typedef uint8_t type; +struct latin1_decoder { + typedef uint8_t type; - template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - result = Traits::low(result, *data); - data += 1; - size -= 1; - } + template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) { + while (size) { + result = Traits::low(result, *data); + data += 1; + size -= 1; + } - return result; - } - }; + return result; + } +}; - template struct wchar_selector; +template struct wchar_selector; - template <> struct wchar_selector<2> - { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - typedef utf16_decoder decoder; - }; +template <> struct wchar_selector<2> { + typedef uint16_t type; + typedef utf16_counter counter; + typedef utf16_writer writer; + typedef utf16_decoder decoder; +}; - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - typedef utf32_decoder decoder; - }; +template <> struct wchar_selector<4> { + typedef uint32_t type; + typedef utf32_counter counter; + typedef utf32_writer writer; + typedef utf32_decoder decoder; +}; - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; +typedef wchar_selector::counter wchar_counter; +typedef wchar_selector::writer wchar_writer; - struct wchar_decoder - { - typedef wchar_t type; +struct wchar_decoder { + typedef wchar_t type; - template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) - { - typedef wchar_selector::decoder decoder; + template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) { + typedef wchar_selector::decoder decoder; - return decoder::process(reinterpret_cast(data), size, result, traits); - } - }; + return decoder::process(reinterpret_cast(data), size, result, traits); + } +}; #ifdef PUGIXML_WCHAR_MODE - PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) - { - for (size_t i = 0; i < length; ++i) - result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); - } +PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) +{ + for (size_t i = 0; i < length; ++i) + result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); +} #endif PUGI__NS_END PUGI__NS_BEGIN - enum chartype_t - { - ct_parse_pcdata = 1, // \0, &, \r, < - ct_parse_attr = 2, // \0, &, \r, ', " - ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab - ct_space = 8, // \r, \n, space, tab - ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32, // \0, -, >, \r - ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . - ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : - }; +enum chartype_t { + ct_parse_pcdata = 1, // \0, &, \r, < + ct_parse_attr = 2, // \0, &, \r, ', " + ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab + ct_space = 8, // \r, \n, space, tab + ct_parse_cdata = 16, // \0, ], >, \r + ct_parse_comment = 32, // \0, -, >, \r + ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . + ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : +}; - static const unsigned char chartype_table[256] = - { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 +static const unsigned char chartype_table[256] = { + 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 - }; + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 +}; - enum chartypex_t - { - ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " - ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ - ctx_digit = 8, // 0-9 - ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . - }; - - static const unsigned char chartypex_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 +enum chartypex_t { + ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > + ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " + ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ + ctx_digit = 8, // 0-9 + ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . +}; - 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 - 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 +static const unsigned char chartypex_table[256] = { + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 + 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct)) -#else - #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast(c)] & (ct)) -#endif + 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 + 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 - #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) - #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) - - PUGI__FN bool is_little_endian() - { - unsigned int ui = 1; - - return *reinterpret_cast(&ui) == 1; - } - - PUGI__FN xml_encoding get_wchar_encoding() - { - PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - if (sizeof(wchar_t) == 2) - return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else - return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - } - - PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3) - { - // look for BOM in first few bytes - if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; - if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; - if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; - if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; - if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; - - // look for <, (contents); - - PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; - - return guess_buffer_encoding(d0, d1, d2, d3); - } - - PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - size_t length = size / sizeof(char_t); - - if (is_mutable) - { - out_buffer = static_cast(const_cast(contents)); - out_length = length; - } - else - { - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - if (contents) - memcpy(buffer, contents, length * sizeof(char_t)); - else - assert(length == 0); - - buffer[length] = 0; - - out_buffer = buffer; - out_length = length + 1; - } - - return true; - } + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; #ifdef PUGIXML_WCHAR_MODE - PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) - { - return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || - (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); - } - - PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const char_t* data = static_cast(contents); - size_t length = size / sizeof(char_t); - - if (is_mutable) - { - char_t* buffer = const_cast(data); - - convert_wchar_endian_swap(buffer, data, length); - - out_buffer = buffer; - out_length = length; - } - else - { - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - convert_wchar_endian_swap(buffer, data, length); - buffer[length] = 0; - - out_buffer = buffer; - out_length = length + 1; - } - - return true; - } - - template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) - { - const typename D::type* data = static_cast(contents); - size_t data_length = size / sizeof(typename D::type); - - // first pass: get length in wchar_t units - size_t length = D::process(data, data_length, 0, wchar_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf16 input to wchar_t - wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // get native encoding - xml_encoding wchar_encoding = get_wchar_encoding(); - - // fast path: no conversion required - if (encoding == wchar_encoding) - return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) - return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf8 - if (encoding == encoding_utf8) - return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); - } - - // source encoding is latin1 - if (encoding == encoding_latin1) - return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); - - assert(!"Invalid encoding"); - return false; - } +#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct)) #else - template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) - { - const typename D::type* data = static_cast(contents); - size_t data_length = size / sizeof(typename D::type); - - // first pass: get length in utf8 units - size_t length = D::process(data, data_length, 0, utf8_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf16 input to utf8 - uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) - { - for (size_t i = 0; i < size; ++i) - if (data[i] > 127) - return i; - - return size; - } - - PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const uint8_t* data = static_cast(contents); - size_t data_length = size; - - // get size of prefix that does not need utf8 conversion - size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); - assert(prefix_length <= data_length); - - const uint8_t* postfix = data + prefix_length; - size_t postfix_length = data_length - prefix_length; - - // if no conversion is needed, just return the original buffer - if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // first pass: get length in utf8 units - size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert latin1 input to utf8 - memcpy(buffer, data, prefix_length); - - uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // fast path: no conversion required - if (encoding == encoding_utf8) - return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); - } - - // source encoding is latin1 - if (encoding == encoding_latin1) - return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); - - assert(!"Invalid encoding"); - return false; - } +#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast(c)] & (ct)) #endif - PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) - { - // get length in utf8 characters - return wchar_decoder::process(str, length, 0, utf8_counter()); - } - - PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) - { - // convert to utf8 - uint8_t* begin = reinterpret_cast(buffer); - uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); - - assert(begin + size == end); - (void)!end; - (void)!size; - } - -#ifndef PUGIXML_NO_STL - PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) - { - // first pass: get length in utf8 characters - size_t size = as_utf8_begin(str, length); - - // allocate resulting string - std::string result; - result.resize(size); - - // second pass: convert to utf8 - if (size > 0) as_utf8_end(&result[0], size, str, length); - - return result; - } - - PUGI__FN std::basic_string as_wide_impl(const char* str, size_t size) - { - const uint8_t* data = reinterpret_cast(str); - - // first pass: get length in wchar_t units - size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); - - // allocate resulting string - std::basic_string result; - result.resize(length); - - // second pass: convert to wchar_t - if (length > 0) - { - wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); - - assert(begin + length == end); - (void)!end; - } - - return result; - } -#endif - - template - inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) - { - // never reuse shared memory - if (header & xml_memory_page_contents_shared_mask) return false; - - size_t target_length = strlength(target); - - // always reuse document buffer memory if possible - if ((header & header_mask) == 0) return target_length >= length; - - // reuse heap memory if waste is not too great - const size_t reuse_threshold = 32; - - return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); - } - - template - PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) - { - if (source_length == 0) - { - // empty string and null pointer are equivalent, so just deallocate old memory - xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - - if (header & header_mask) alloc->deallocate_string(dest); - - // mark the string as not allocated - dest = 0; - header &= ~header_mask; - - return true; - } - else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) - { - // we can reuse old buffer, so just copy the new data (including zero terminator) - memcpy(dest, source, source_length * sizeof(char_t)); - dest[source_length] = 0; - - return true; - } - else - { - xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - - if (!alloc->reserve()) return false; - - // allocate new buffer - char_t* buf = alloc->allocate_string(source_length + 1); - if (!buf) return false; - - // copy the string (including zero terminator) - memcpy(buf, source, source_length * sizeof(char_t)); - buf[source_length] = 0; - - // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) - if (header & header_mask) alloc->deallocate_string(dest); - - // the string is now allocated, so set the flag - dest = buf; - header |= header_mask; - - return true; - } - } - - struct gap - { - char_t* end; - size_t size; - - gap(): end(0), size(0) - { - } - - // Push new gap, move s count bytes further (skipping the gap). - // Collapse previous gap. - void push(char_t*& s, size_t count) - { - if (end) // there was a gap already; collapse it - { - // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) - assert(s >= end); - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - } - - s += count; // end of current gap - - // "merge" two gaps - end = s; - size += count; - } - - // Collapse all gaps, return past-the-end pointer - char_t* flush(char_t* s) - { - if (end) - { - // Move [old_gap_end, current_pos) to [old_gap_start, ...) - assert(s >= end); - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - - return s - size; - } - else return s; - } - }; - - PUGI__FN char_t* strconv_escape(char_t* s, gap& g) - { - char_t* stre = s + 1; - - switch (*stre) - { - case '#': // &#... - { - unsigned int ucsc = 0; - - if (stre[1] == 'x') // &#x... (hex code) - { - stre += 2; - - char_t ch = *stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 16 * ucsc + (ch - '0'); - else if (static_cast((ch | ' ') - 'a') <= 5) - ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - else // &#... (dec code) - { - char_t ch = *++stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(static_cast(ch) - '0') <= 9) - ucsc = 10 * ucsc + (ch - '0'); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - - #ifdef PUGIXML_WCHAR_MODE - s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); - #else - s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); - #endif - - g.push(s, stre - s); - return stre; - } - - case 'a': // &a - { - ++stre; - - if (*stre == 'm') // &am - { - if (*++stre == 'p' && *++stre == ';') // & - { - *s++ = '&'; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - else if (*stre == 'p') // &ap - { - if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' - { - *s++ = '\''; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - break; - } - - case 'g': // &g - { - if (*++stre == 't' && *++stre == ';') // > - { - *s++ = '>'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - case 'l': // &l - { - if (*++stre == 't' && *++stre == ';') // < - { - *s++ = '<'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - case 'q': // &q - { - if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " - { - *s++ = '"'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - default: - break; - } - - return stre; - } - - // Parser utilities - #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) - #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } - #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) - #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } - #define PUGI__POPNODE() { cursor = cursor->parent; } - #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } - #define PUGI__SCANWHILE(X) { while (X) ++s; } - #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } - #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } - #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast(0) - #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } - - PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here - { - *g.flush(s) = 0; - - return s + (s[2] == '>' ? 3 : 2); - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - typedef char_t* (*strconv_pcdata_t)(char_t*); - - template struct strconv_pcdata_impl - { - static char_t* parse(char_t* s) - { - gap g; - - char_t* begin = s; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); - - if (*s == '<') // PCDATA ends here - { - char_t* end = g.flush(s); - - if (opt_trim::value) - while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) - --end; - - *end = 0; - - return s + 1; - } - else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (*s == 0) - { - char_t* end = g.flush(s); - - if (opt_trim::value) - while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) - --end; - - *end = 0; - - return s; - } - else ++s; - } - } - }; - - PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) - { - PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); - - switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) - { - case 0: return strconv_pcdata_impl::parse; - case 1: return strconv_pcdata_impl::parse; - case 2: return strconv_pcdata_impl::parse; - case 3: return strconv_pcdata_impl::parse; - case 4: return strconv_pcdata_impl::parse; - case 5: return strconv_pcdata_impl::parse; - case 6: return strconv_pcdata_impl::parse; - case 7: return strconv_pcdata_impl::parse; - default: assert(false); return 0; // should not get here - } - } - - typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - - template struct strconv_attribute_impl - { - static char_t* parse_wnorm(char_t* s, char_t end_quote) - { - gap g; - - // trim leading whitespaces - if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s; - - do ++str; - while (PUGI__IS_CHARTYPE(*str, ct_space)); - - g.push(s, str - s); - } - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); - - if (*s == end_quote) - { - char_t* str = g.flush(s); - - do *str-- = 0; - while (PUGI__IS_CHARTYPE(*str, ct_space)); - - return s + 1; - } - else if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - *s++ = ' '; - - if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s + 1; - while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; - - g.push(s, str - s); - } - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_wconv(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - if (*s == '\r') - { - *s++ = ' '; - - if (*s == '\n') g.push(s, 1); - } - else *s++ = ' '; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_eol(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == '\r') - { - *s++ = '\n'; - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_simple(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - }; - - PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) - { - PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - - switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) - { - case 0: return strconv_attribute_impl::parse_simple; - case 1: return strconv_attribute_impl::parse_simple; - case 2: return strconv_attribute_impl::parse_eol; - case 3: return strconv_attribute_impl::parse_eol; - case 4: return strconv_attribute_impl::parse_wconv; - case 5: return strconv_attribute_impl::parse_wconv; - case 6: return strconv_attribute_impl::parse_wconv; - case 7: return strconv_attribute_impl::parse_wconv; - case 8: return strconv_attribute_impl::parse_wnorm; - case 9: return strconv_attribute_impl::parse_wnorm; - case 10: return strconv_attribute_impl::parse_wnorm; - case 11: return strconv_attribute_impl::parse_wnorm; - case 12: return strconv_attribute_impl::parse_wnorm; - case 13: return strconv_attribute_impl::parse_wnorm; - case 14: return strconv_attribute_impl::parse_wnorm; - case 15: return strconv_attribute_impl::parse_wnorm; - default: assert(false); return 0; // should not get here - } - } - - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) - { - xml_parse_result result; - result.status = status; - result.offset = offset; - - return result; - } - - struct xml_parser - { - xml_allocator alloc; - xml_allocator* alloc_state; - char_t* error_offset; - xml_parse_status error_status; - - xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) - { - } - - ~xml_parser() - { - *alloc_state = alloc; - } - - // DOCTYPE consists of nested sections of the following possible types: - // , , "...", '...' - // - // - // First group can not contain nested groups - // Second group can contain nested groups of the same type - // Third group can contain all other groups - char_t* parse_doctype_primitive(char_t* s) - { - if (*s == '"' || *s == '\'') - { - // quoted string - char_t ch = *s++; - PUGI__SCANFOR(*s == ch); - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s++; - } - else if (s[0] == '<' && s[1] == '?') - { - // - s += 2; - PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s += 2; - } - else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') - { - s += 4; - PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s += 3; - } - else PUGI__THROW_ERROR(status_bad_doctype, s); - - return s; - } - - char_t* parse_doctype_ignore(char_t* s) - { - size_t depth = 0; - - assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s += 3; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] == '[') - { - // nested ignore section - s += 3; - depth++; - } - else if (s[0] == ']' && s[1] == ']' && s[2] == '>') - { - // ignore section end - s += 3; - - if (depth == 0) - return s; - - depth--; - } - else s++; - } - - PUGI__THROW_ERROR(status_bad_doctype, s); - } - - char_t* parse_doctype_group(char_t* s, char_t endch) - { - size_t depth = 0; - - assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s += 2; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] != '-') - { - if (s[2] == '[') - { - // ignore - s = parse_doctype_ignore(s); - if (!s) return s; - } - else - { - // some control group - s += 2; - depth++; - } - } - else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') - { - // unknown tag (forbidden), or some primitive group - s = parse_doctype_primitive(s); - if (!s) return s; - } - else if (*s == '>') - { - if (depth == 0) - return s; - - depth--; - s++; - } - else s++; - } - - if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); - - return s; - } - - char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) - { - // parse node contents, starting with exclamation mark - ++s; - - if (*s == '-') // 'value = s; // Save the offset. - } - - if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) - { - s = strconv_comment(s, endch); - - if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); - } - else - { - // Scan for terminating '-->'. - PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_comment, s); - - if (PUGI__OPTSET(parse_comments)) - *s = 0; // Zero-terminate this segment at the first terminating '-'. - - s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. - } - } - else PUGI__THROW_ERROR(status_bad_comment, s); - } - else if (*s == '[') - { - // 'value = s; // Save the offset. - - if (PUGI__OPTSET(parse_eol)) - { - s = strconv_cdata(s, endch); - - if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); - } - else - { - // Scan for terminating ']]>'. - PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_cdata, s); - - *s++ = 0; // Zero-terminate this segment. - } - } - else // Flagged for discard, but we still have to scan for the terminator. - { - // Scan for terminating ']]>'. - PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_cdata, s); - - ++s; - } - - s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. - } - else PUGI__THROW_ERROR(status_bad_cdata, s); - } - else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) - { - s -= 2; - - if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); - - char_t* mark = s + 9; - - s = parse_doctype_group(s, endch); - if (!s) return s; - - assert((*s == 0 && endch == '>') || *s == '>'); - if (*s) *s++ = 0; - - if (PUGI__OPTSET(parse_doctype)) - { - while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; - - PUGI__PUSHNODE(node_doctype); - - cursor->value = mark; - } - } - else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); - else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); - else PUGI__THROW_ERROR(status_unrecognized_tag, s); - - return s; - } - - char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) - { - // load into registers - xml_node_struct* cursor = ref_cursor; - char_t ch = 0; - - // parse node contents, starting with question mark - ++s; - - // read PI target - char_t* target = s; - - if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); - - PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); - PUGI__CHECK_ERROR(status_bad_pi, s); - - // determine node type; stricmp / strcasecmp is not portable - bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - - if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) - { - if (declaration) - { - // disallow non top-level declarations - if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); - - PUGI__PUSHNODE(node_declaration); - } - else - { - PUGI__PUSHNODE(node_pi); - } - - cursor->name = target; - - PUGI__ENDSEG(); - - // parse value/attributes - if (ch == '?') - { - // empty node - if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); - - PUGI__POPNODE(); - } - else if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - PUGI__SKIPWS(); - - // scan for tag end - char_t* value = s; - - PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); - PUGI__CHECK_ERROR(status_bad_pi, s); - - if (declaration) - { - // replace ending ? with / so that 'element' terminates properly - *s = '/'; - - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - s = value; - } - else - { - // store value and step over > - cursor->value = value; - - PUGI__POPNODE(); - - PUGI__ENDSEG(); - - s += (*s == '>'); - } - } - else PUGI__THROW_ERROR(status_bad_pi, s); - } - else - { - // scan for tag end - PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); - PUGI__CHECK_ERROR(status_bad_pi, s); - - s += (s[1] == '>' ? 2 : 1); - } - - // store from registers - ref_cursor = cursor; - - return s; - } - - char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) - { - strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); - strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - - char_t ch = 0; - xml_node_struct* cursor = root; - char_t* mark = s; - - while (*s != 0) - { - if (*s == '<') - { - ++s; - - LOC_TAG: - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' - { - PUGI__PUSHNODE(node_element); // Append a new node to the tree. - - cursor->name = s; - - PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. - PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. - - if (ch == '>') - { - // end of tag - } - else if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - LOC_ATTRIBUTES: - while (true) - { - PUGI__SKIPWS(); // Eat any whitespace. - - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... - { - xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. - if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); - - a->name = s; // Save the offset. - - PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. - PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. - - if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - PUGI__SKIPWS(); // Eat any whitespace. - - ch = *s; - ++s; - } - - if (ch == '=') // '<... #=...' - { - PUGI__SKIPWS(); // Eat any whitespace. - - if (*s == '"' || *s == '\'') // '<... #="...' - { - ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. - ++s; // Step over the quote. - a->value = s; // Save the offset. - - s = strconv_attribute(s, ch); - - if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); - - // After this line the loop continues from the start; - // Whitespaces, / and > are ok, symbols and EOF are wrong, - // everything else will be detected - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); - } - else PUGI__THROW_ERROR(status_bad_attribute, s); - } - else PUGI__THROW_ERROR(status_bad_attribute, s); - } - else if (*s == '/') - { - ++s; - - if (*s == '>') - { - PUGI__POPNODE(); - s++; - break; - } - else if (*s == 0 && endch == '>') - { - PUGI__POPNODE(); - break; - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '>') - { - ++s; - - break; - } - else if (*s == 0 && endch == '>') - { - break; - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - - // !!! - } - else if (ch == '/') // '<#.../' - { - if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); - - PUGI__POPNODE(); // Pop. - - s += (*s == '>'); - } - else if (ch == 0) - { - // we stepped over null terminator, backtrack & handle closing tag - --s; - - if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '/') - { - ++s; - - char_t* name = cursor->name; - if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); - - while (PUGI__IS_CHARTYPE(*s, ct_symbol)) - { - if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); - } - - if (*name) - { - if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); - else PUGI__THROW_ERROR(status_end_element_mismatch, s); - } - - PUGI__POPNODE(); // Pop. - - PUGI__SKIPWS(); - - if (*s == 0) - { - if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); - } - else - { - if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); - ++s; - } - } - else if (*s == '?') // 'first_child) continue; - } - } - - if (!PUGI__OPTSET(parse_trim_pcdata)) - s = mark; - - if (cursor->parent || PUGI__OPTSET(parse_fragment)) - { - PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. - - s = strconv_pcdata(s); - - PUGI__POPNODE(); // Pop since this is a standalone. - - if (!*s) break; - } - else - { - PUGI__SCANFOR(*s == '<'); // '...<' - if (!*s) break; - - ++s; - } - - // We're after '<' - goto LOC_TAG; - } - } - - // check that last tag is closed - if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); - - return s; - } - - #ifdef PUGIXML_WCHAR_MODE - static char_t* parse_skip_bom(char_t* s) - { - unsigned int bom = 0xfeff; - return (s[0] == static_cast(bom)) ? s + 1 : s; - } - #else - static char_t* parse_skip_bom(char_t* s) - { - return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; - } - #endif - - static bool has_element_node_siblings(xml_node_struct* node) - { - while (node) - { - if (PUGI__NODETYPE(node) == node_element) return true; - - node = node->next_sibling; - } - - return false; - } - - static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) - { - // early-out for empty documents - if (length == 0) - return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); - - // get last child of the root before parsing - xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; - - // create parser on stack - xml_parser parser(static_cast(xmldoc)); - - // save last character and make buffer zero-terminated (speeds up parsing) - char_t endch = buffer[length - 1]; - buffer[length - 1] = 0; - - // skip BOM to make sure it does not end up as part of parse output - char_t* buffer_data = parse_skip_bom(buffer); - - // perform actual parsing - parser.parse_tree(buffer_data, root, optmsk, endch); - - xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); - assert(result.offset >= 0 && static_cast(result.offset) <= length); - - if (result) - { - // since we removed last character, we have to handle the only possible false positive (stray <) - if (endch == '<') - return make_parse_result(status_unrecognized_tag, length - 1); - - // check if there are any element nodes parsed - xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; - - if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) - return make_parse_result(status_no_document_element, length - 1); - } - else - { - // roll back offset if it occurs on a null terminator in the source buffer - if (result.offset > 0 && static_cast(result.offset) == length - 1 && endch == 0) - result.offset--; - } - - return result; - } - }; - - // Output facilities - PUGI__FN xml_encoding get_write_native_encoding() - { - #ifdef PUGIXML_WCHAR_MODE - return get_wchar_encoding(); - #else - return encoding_utf8; - #endif - } - - PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // assume utf8 encoding - return encoding_utf8; - } - - template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) - { - PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); - - typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); - - return static_cast(end - dest) * sizeof(*dest); - } - - template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) - { - PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); - - typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); - - if (opt_swap) - { - for (typename T::value_type i = dest; i != end; ++i) - *i = endian_swap(*i); - } - - return static_cast(end - dest) * sizeof(*dest); - } +#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) +#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) + +PUGI__FN bool is_little_endian() +{ + unsigned int ui = 1; + + return *reinterpret_cast(&ui) == 1; +} + +PUGI__FN xml_encoding get_wchar_encoding() +{ + PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); + + if (sizeof(wchar_t) == 2) + return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + else + return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; +} + +PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3) +{ + // look for BOM in first few bytes + if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; + if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; + if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; + if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; + if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; + + // look for <, (contents); + + PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; + + return guess_buffer_encoding(d0, d1, d2, d3); +} + +PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) +{ + size_t length = size / sizeof(char_t); + + if (is_mutable) { + out_buffer = static_cast(const_cast(contents)); + out_length = length; + } else { + char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!buffer) return false; + + if (contents) + memcpy(buffer, contents, length * sizeof(char_t)); + else + assert(length == 0); + + buffer[length] = 0; + + out_buffer = buffer; + out_length = length + 1; + } + + return true; +} #ifdef PUGIXML_WCHAR_MODE - PUGI__FN size_t get_valid_length(const char_t* data, size_t length) - { - if (length < 1) return 0; +PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) +{ + return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || + (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); +} - // discard last character if it's the lead of a surrogate pair - return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; - } +PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) +{ + const char_t* data = static_cast(contents); + size_t length = size / sizeof(char_t); - PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) - { - // only endian-swapping is required - if (need_endian_swap_utf(encoding, get_wchar_encoding())) - { - convert_wchar_endian_swap(r_char, data, length); + if (is_mutable) { + char_t* buffer = const_cast(data); - return length * sizeof(char_t); - } - - // convert to utf8 - if (encoding == encoding_utf8) - return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); + convert_wchar_endian_swap(buffer, data, length); - // convert to utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + out_buffer = buffer; + out_length = length; + } else { + char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!buffer) return false; - return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); - } + convert_wchar_endian_swap(buffer, data, length); + buffer[length] = 0; - // convert to utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + out_buffer = buffer; + out_length = length + 1; + } - return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); - } + return true; +} - // convert to latin1 - if (encoding == encoding_latin1) - return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); +template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) +{ + const typename D::type* data = static_cast(contents); + size_t data_length = size / sizeof(typename D::type); - assert(!"Invalid encoding"); - return 0; - } + // first pass: get length in wchar_t units + size_t length = D::process(data, data_length, 0, wchar_counter()); + + // allocate buffer of suitable length + char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!buffer) return false; + + // second pass: convert utf16 input to wchar_t + wchar_writer::value_type obegin = reinterpret_cast(buffer); + wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); + + assert(oend == obegin + length); + *oend = 0; + + out_buffer = buffer; + out_length = length + 1; + + return true; +} + +PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) +{ + // get native encoding + xml_encoding wchar_encoding = get_wchar_encoding(); + + // fast path: no conversion required + if (encoding == wchar_encoding) + return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // only endian-swapping is required + if (need_endian_swap_utf(encoding, wchar_encoding)) + return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf8 + if (encoding == encoding_utf8) + return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); + } + + // source encoding is latin1 + if (encoding == encoding_latin1) + return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); + + assert(!"Invalid encoding"); + return false; +} #else - PUGI__FN size_t get_valid_length(const char_t* data, size_t length) - { - if (length < 5) return 0; +template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) +{ + const typename D::type* data = static_cast(contents); + size_t data_length = size / sizeof(typename D::type); - for (size_t i = 1; i <= 4; ++i) - { - uint8_t ch = static_cast(data[length - i]); + // first pass: get length in utf8 units + size_t length = D::process(data, data_length, 0, utf8_counter()); - // either a standalone character or a leading one - if ((ch & 0xc0) != 0x80) return length - i; - } + // allocate buffer of suitable length + char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!buffer) return false; - // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk - return length; - } + // second pass: convert utf16 input to utf8 + uint8_t* obegin = reinterpret_cast(buffer); + uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); - PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) - { - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + assert(oend == obegin + length); + *oend = 0; - return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); - } + out_buffer = buffer; + out_length = length + 1; - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + return true; +} - return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); - } +PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) +{ + for (size_t i = 0; i < size; ++i) + if (data[i] > 127) + return i; - if (encoding == encoding_latin1) - return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); + return size; +} - assert(!"Invalid encoding"); - return 0; - } +PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) +{ + const uint8_t* data = static_cast(contents); + size_t data_length = size; + + // get size of prefix that does not need utf8 conversion + size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); + assert(prefix_length <= data_length); + + const uint8_t* postfix = data + prefix_length; + size_t postfix_length = data_length - prefix_length; + + // if no conversion is needed, just return the original buffer + if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // first pass: get length in utf8 units + size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); + + // allocate buffer of suitable length + char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!buffer) return false; + + // second pass: convert latin1 input to utf8 + memcpy(buffer, data, prefix_length); + + uint8_t* obegin = reinterpret_cast(buffer); + uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); + + assert(oend == obegin + length); + *oend = 0; + + out_buffer = buffer; + out_length = length + 1; + + return true; +} + +PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) +{ + // fast path: no conversion required + if (encoding == encoding_utf8) + return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + + // source encoding is utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return (native_encoding == encoding) ? + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); + } + + // source encoding is utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return (native_encoding == encoding) ? + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); + } + + // source encoding is latin1 + if (encoding == encoding_latin1) + return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); + + assert(!"Invalid encoding"); + return false; +} #endif - class xml_buffered_writer - { - xml_buffered_writer(const xml_buffered_writer&); - xml_buffered_writer& operator=(const xml_buffered_writer&); - - public: - xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) - { - PUGI__STATIC_ASSERT(bufcapacity >= 8); - } - - size_t flush() - { - flush(buffer, bufsize); - bufsize = 0; - return 0; - } - - void flush(const char_t* data, size_t size) - { - if (size == 0) return; - - // fast path, just write data - if (encoding == get_write_native_encoding()) - writer.write(data, size * sizeof(char_t)); - else - { - // convert chunk - size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); - assert(result <= sizeof(scratch)); - - // write data - writer.write(scratch.data_u8, result); - } - } - - void write_direct(const char_t* data, size_t length) - { - // flush the remaining buffer contents - flush(); - - // handle large chunks - if (length > bufcapacity) - { - if (encoding == get_write_native_encoding()) - { - // fast path, can just write data chunk - writer.write(data, length * sizeof(char_t)); - return; - } - - // need to convert in suitable chunks - while (length > bufcapacity) - { - // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer - // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) - size_t chunk_size = get_valid_length(data, bufcapacity); - assert(chunk_size); - - // convert chunk and write - flush(data, chunk_size); - - // iterate - data += chunk_size; - length -= chunk_size; - } - - // small tail is copied below - bufsize = 0; - } - - memcpy(buffer + bufsize, data, length * sizeof(char_t)); - bufsize += length; - } - - void write_buffer(const char_t* data, size_t length) - { - size_t offset = bufsize; - - if (offset + length <= bufcapacity) - { - memcpy(buffer + offset, data, length * sizeof(char_t)); - bufsize = offset + length; - } - else - { - write_direct(data, length); - } - } - - void write_string(const char_t* data) - { - // write the part of the string that fits in the buffer - size_t offset = bufsize; - - while (*data && offset < bufcapacity) - buffer[offset++] = *data++; - - // write the rest - if (offset < bufcapacity) - { - bufsize = offset; - } - else - { - // backtrack a bit if we have split the codepoint - size_t length = offset - bufsize; - size_t extra = length - get_valid_length(data - length, length); - - bufsize = offset - extra; - - write_direct(data - extra, strlength(data) + extra); - } - } - - void write(char_t d0) - { - size_t offset = bufsize; - if (offset > bufcapacity - 1) offset = flush(); - - buffer[offset + 0] = d0; - bufsize = offset + 1; - } - - void write(char_t d0, char_t d1) - { - size_t offset = bufsize; - if (offset > bufcapacity - 2) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - bufsize = offset + 2; - } - - void write(char_t d0, char_t d1, char_t d2) - { - size_t offset = bufsize; - if (offset > bufcapacity - 3) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - bufsize = offset + 3; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3) - { - size_t offset = bufsize; - if (offset > bufcapacity - 4) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - bufsize = offset + 4; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) - { - size_t offset = bufsize; - if (offset > bufcapacity - 5) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - buffer[offset + 4] = d4; - bufsize = offset + 5; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) - { - size_t offset = bufsize; - if (offset > bufcapacity - 6) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - buffer[offset + 4] = d4; - buffer[offset + 5] = d5; - bufsize = offset + 6; - } - - // utf8 maximum expansion: x4 (-> utf32) - // utf16 maximum expansion: x2 (-> utf32) - // utf32 maximum expansion: x1 - enum - { - bufcapacitybytes = - #ifdef PUGIXML_MEMORY_OUTPUT_STACK - PUGIXML_MEMORY_OUTPUT_STACK - #else - 10240 - #endif - , - bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) - }; - - char_t buffer[bufcapacity]; - - union - { - uint8_t data_u8[4 * bufcapacity]; - uint16_t data_u16[2 * bufcapacity]; - uint32_t data_u32[bufcapacity]; - char_t data_char[bufcapacity]; - } scratch; - - xml_writer& writer; - size_t bufsize; - xml_encoding encoding; - }; - - PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) - { - while (*s) - { - const char_t* prev = s; - - // While *s is a usual symbol - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); - - writer.write_buffer(prev, static_cast(s - prev)); - - switch (*s) - { - case 0: break; - case '&': - writer.write('&', 'a', 'm', 'p', ';'); - ++s; - break; - case '<': - writer.write('&', 'l', 't', ';'); - ++s; - break; - case '>': - writer.write('&', 'g', 't', ';'); - ++s; - break; - case '"': - writer.write('&', 'q', 'u', 'o', 't', ';'); - ++s; - break; - default: // s is not a usual symbol - { - unsigned int ch = static_cast(*s++); - assert(ch < 32); - - writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); - } - } - } - } - - PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) - { - if (flags & format_no_escapes) - writer.write_string(s); - else - text_output_escaped(writer, s, type); - } - - PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) - { - do - { - writer.write('<', '!', '[', 'C', 'D'); - writer.write('A', 'T', 'A', '['); - - const char_t* prev = s; - - // look for ]]> sequence - we can't output it as is since it terminates CDATA - while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; - - // skip ]] if we stopped at ]]>, > will go to the next CDATA section - if (*s) s += 2; - - writer.write_buffer(prev, static_cast(s - prev)); - - writer.write(']', ']', '>'); - } - while (*s); - } - - PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) - { - switch (indent_length) - { - case 1: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0]); - break; - } - - case 2: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1]); - break; - } - - case 3: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1], indent[2]); - break; - } - - case 4: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1], indent[2], indent[3]); - break; - } - - default: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write_buffer(indent, indent_length); - } - } - } - - PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) - { - writer.write('<', '!', '-', '-'); - - while (*s) - { - const char_t* prev = s; - - // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body - while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; - - writer.write_buffer(prev, static_cast(s - prev)); - - if (*s) - { - assert(*s == '-'); - - writer.write('-', ' '); - ++s; - } - } - - writer.write('-', '-', '>'); - } - - PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) - { - while (*s) - { - const char_t* prev = s; - - // look for ?> sequence - we can't output it since ?> terminates PI - while (*s && !(s[0] == '?' && s[1] == '>')) ++s; - - writer.write_buffer(prev, static_cast(s - prev)); - - if (*s) - { - assert(s[0] == '?' && s[1] == '>'); - - writer.write('?', ' ', '>'); - s += 2; - } - } - } - - PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) - { - if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) - { - writer.write('\n'); - - text_output_indent(writer, indent, indent_length, depth + 1); - } - else - { - writer.write(' '); - } - - writer.write_string(a->name ? a->name + 0 : default_name); - writer.write('=', '"'); - - if (a->value) - text_output(writer, a->value, ctx_special_attr, flags); - - writer.write('"'); - } - } - - PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name + 0 : default_name; - - writer.write('<'); - writer.write_string(name); - - if (node->first_attribute) - node_output_attributes(writer, node, indent, indent_length, flags, depth); - - if (!node->first_child) - { - writer.write(' ', '/', '>'); - - return false; - } - else - { - writer.write('>'); - - return true; - } - } - - PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name + 0 : default_name; - - writer.write('<', '/'); - writer.write_string(name); - writer.write('>'); - } - - PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - switch (PUGI__NODETYPE(node)) - { - case node_pcdata: - text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); - break; - - case node_cdata: - text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); - break; - - case node_comment: - node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); - break; - - case node_pi: - writer.write('<', '?'); - writer.write_string(node->name ? node->name + 0 : default_name); - - if (node->value) - { - writer.write(' '); - node_output_pi_value(writer, node->value); - } - - writer.write('?', '>'); - break; - - case node_declaration: - writer.write('<', '?'); - writer.write_string(node->name ? node->name + 0 : default_name); - node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); - writer.write('?', '>'); - break; - - case node_doctype: - writer.write('<', '!', 'D', 'O', 'C'); - writer.write('T', 'Y', 'P', 'E'); - - if (node->value) - { - writer.write(' '); - writer.write_string(node->value); - } - - writer.write('>'); - break; - - default: - assert(!"Invalid node type"); - } - } - - enum indent_flags_t - { - indent_newline = 1, - indent_indent = 2 - }; - - PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) - { - size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; - unsigned int indent_flags = indent_indent; - - xml_node_struct* node = root; - - do - { - assert(node); - - // begin writing current node - if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) - { - node_output_simple(writer, node, flags); - - indent_flags = 0; - } - else - { - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - - if ((indent_flags & indent_indent) && indent_length) - text_output_indent(writer, indent, indent_length, depth); - - if (PUGI__NODETYPE(node) == node_element) - { - indent_flags = indent_newline | indent_indent; - - if (node_output_start(writer, node, indent, indent_length, flags, depth)) - { - node = node->first_child; - depth++; - continue; - } - } - else if (PUGI__NODETYPE(node) == node_document) - { - indent_flags = indent_indent; - - if (node->first_child) - { - node = node->first_child; - continue; - } - } - else - { - node_output_simple(writer, node, flags); - - indent_flags = indent_newline | indent_indent; - } - } - - // continue to the next node - while (node != root) - { - if (node->next_sibling) - { - node = node->next_sibling; - break; - } - - node = node->parent; - - // write closing node - if (PUGI__NODETYPE(node) == node_element) - { - depth--; - - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - - if ((indent_flags & indent_indent) && indent_length) - text_output_indent(writer, indent, indent_length, depth); - - node_output_end(writer, node); - - indent_flags = indent_newline | indent_indent; - } - } - } - while (node != root); - - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - } - - PUGI__FN bool has_declaration(xml_node_struct* node) - { - for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) - { - xml_node_type type = PUGI__NODETYPE(child); - - if (type == node_declaration) return true; - if (type == node_element) return false; - } - - return false; - } - - PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) - { - for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) - if (a == attr) - return true; - - return false; - } - - PUGI__FN bool allow_insert_attribute(xml_node_type parent) - { - return parent == node_element || parent == node_declaration; - } - - PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) - { - if (parent != node_document && parent != node_element) return false; - if (child == node_document || child == node_null) return false; - if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; - - return true; - } - - PUGI__FN bool allow_move(xml_node parent, xml_node child) - { - // check that child can be a child of parent - if (!allow_insert_child(parent.type(), child.type())) - return false; - - // check that node is not moved between documents - if (parent.root() != child.root()) - return false; - - // check that new parent is not in the child subtree - xml_node cur = parent; - - while (cur) - { - if (cur == child) - return false; - - cur = cur.parent(); - } - - return true; - } - - template - PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) - { - assert(!dest && (header & header_mask) == 0); - - if (source) - { - if (alloc && (source_header & header_mask) == 0) - { - dest = source; - - // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared - header |= xml_memory_page_contents_shared_mask; - source_header |= xml_memory_page_contents_shared_mask; - } - else - strcpy_insitu(dest, header, header_mask, source, strlength(source)); - } - } - - PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) - { - node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); - node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); - - for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) - { - xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); - - if (da) - { - node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); - node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); - } - } - } - - PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) - { - xml_allocator& alloc = get_allocator(dn); - xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; - - node_copy_contents(dn, sn, shared_alloc); - - xml_node_struct* dit = dn; - xml_node_struct* sit = sn->first_child; - - while (sit && sit != sn) - { - if (sit != dn) - { - xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); - - if (copy) - { - node_copy_contents(copy, sit, shared_alloc); - - if (sit->first_child) - { - dit = copy; - sit = sit->first_child; - continue; - } - } - } - - // continue to the next node - do - { - if (sit->next_sibling) - { - sit = sit->next_sibling; - break; - } - - sit = sit->parent; - dit = dit->parent; - } - while (sit != sn); - } - } - - PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) - { - xml_allocator& alloc = get_allocator(da); - xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; - - node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); - node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); - } - - inline bool is_text_node(xml_node_struct* node) - { - xml_node_type type = PUGI__NODETYPE(node); - - return type == node_pcdata || type == node_cdata; - } - - // get value with conversion functions - template U string_to_integer(const char_t* value, U minneg, U maxpos) - { - U result = 0; - const char_t* s = value; - - while (PUGI__IS_CHARTYPE(*s, ct_space)) - s++; - - bool negative = (*s == '-'); - - s += (*s == '+' || *s == '-'); - - bool overflow = false; - - if (s[0] == '0' && (s[1] | ' ') == 'x') - { - s += 2; - - // since overflow detection relies on length of the sequence skip leading zeros - while (*s == '0') - s++; - - const char_t* start = s; - - for (;;) - { - if (static_cast(*s - '0') < 10) - result = result * 16 + (*s - '0'); - else if (static_cast((*s | ' ') - 'a') < 6) - result = result * 16 + ((*s | ' ') - 'a' + 10); - else - break; - - s++; - } - - size_t digits = static_cast(s - start); - - overflow = digits > sizeof(U) * 2; - } - else - { - // since overflow detection relies on length of the sequence skip leading zeros - while (*s == '0') - s++; - - const char_t* start = s; - - for (;;) - { - if (static_cast(*s - '0') < 10) - result = result * 10 + (*s - '0'); - else - break; - - s++; - } - - size_t digits = static_cast(s - start); - - PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); - - const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; - const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; - const size_t high_bit = sizeof(U) * 8 - 1; - - overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); - } - - if (negative) - return (overflow || result > minneg) ? 0 - minneg : 0 - result; - else - return (overflow || result > maxpos) ? maxpos : result; - } - - PUGI__FN int get_value_int(const char_t* value) - { - return string_to_integer(value, 0 - static_cast(INT_MIN), INT_MAX); - } - - PUGI__FN unsigned int get_value_uint(const char_t* value) - { - return string_to_integer(value, 0, UINT_MAX); - } - - PUGI__FN double get_value_double(const char_t* value) - { - #ifdef PUGIXML_WCHAR_MODE - return wcstod(value, 0); - #else - return strtod(value, 0); - #endif - } - - PUGI__FN float get_value_float(const char_t* value) - { - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstod(value, 0)); - #else - return static_cast(strtod(value, 0)); - #endif - } - - PUGI__FN bool get_value_bool(const char_t* value) - { - // only look at first char - char_t first = *value; - - // 1*, t* (true), T* (True), y* (yes), Y* (YES) - return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long get_value_llong(const char_t* value) - { - return string_to_integer(value, 0 - static_cast(LLONG_MIN), LLONG_MAX); - } - - PUGI__FN unsigned long long get_value_ullong(const char_t* value) - { - return string_to_integer(value, 0, ULLONG_MAX); - } -#endif - - template - PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) - { - char_t* result = end - 1; - U rest = negative ? 0 - value : value; - - do - { - *result-- = static_cast('0' + (rest % 10)); - rest /= 10; - } - while (rest); - - assert(result >= begin); - (void)begin; - - *result = '-'; - - return result + !negative; - } - - // set value with conversion functions - template - PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) - { - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); - - size_t offset = 0; - for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; - - return strcpy_insitu(dest, header, header_mask, wbuf, offset); - #else - return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); - #endif - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) - { - char_t buf[64]; - char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value, value < 0); - - return strcpy_insitu(dest, header, header_mask, begin, end - begin); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) - { - char_t buf[64]; - char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value, false); - - return strcpy_insitu(dest, header, header_mask, begin, end - begin); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) - { - char buf[128]; - sprintf(buf, "%.9g", value); - - return set_value_ascii(dest, header, header_mask, buf); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) - { - char buf[128]; - sprintf(buf, "%.17g", value); - - return set_value_ascii(dest, header, header_mask, buf); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) - { - return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); - } - -#ifdef PUGIXML_HAS_LONG_LONG - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) - { - char_t buf[64]; - char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value, value < 0); - - return strcpy_insitu(dest, header, header_mask, begin, end - begin); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) - { - char_t buf[64]; - char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value, false); - - return strcpy_insitu(dest, header, header_mask, begin, end - begin); - } -#endif - - PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) - { - // check input buffer - if (!contents && size) return make_parse_result(status_io_error); - - // get actual encoding - xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); - - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); - - // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself - if (own || buffer != contents) *out_buffer = buffer; - - // store buffer for offset_debug - doc->buffer = buffer; - - // parse - xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); - - // remember encoding - res.encoding = buffer_encoding; - - return res; - } - - // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick - PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) - { - #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) - // there are 64-bit versions of fseek/ftell, let's use them - typedef __int64 length_type; - - _fseeki64(file, 0, SEEK_END); - length_type length = _ftelli64(file); - _fseeki64(file, 0, SEEK_SET); - #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) - // there are 64-bit versions of fseek/ftell, let's use them - typedef off64_t length_type; - - fseeko64(file, 0, SEEK_END); - length_type length = ftello64(file); - fseeko64(file, 0, SEEK_SET); - #else - // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. - typedef long length_type; - - fseek(file, 0, SEEK_END); - length_type length = ftell(file); - fseek(file, 0, SEEK_SET); - #endif - - // check for I/O errors - if (length < 0) return status_io_error; - - // check for overflow - size_t result = static_cast(length); - - if (static_cast(result) != length) return status_out_of_memory; - - // finalize - out_result = result; - - return status_ok; - } - - // This function assumes that buffer has extra sizeof(char_t) writable bytes after size - PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) - { - // We only need to zero-terminate if encoding conversion does not do it for us - #ifdef PUGIXML_WCHAR_MODE - xml_encoding wchar_encoding = get_wchar_encoding(); - - if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) - { - size_t length = size / sizeof(char_t); - - static_cast(buffer)[length] = 0; - return (length + 1) * sizeof(char_t); - } - #else - if (encoding == encoding_utf8) - { - static_cast(buffer)[size] = 0; - return size + 1; - } - #endif - - return size; - } - - PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) - { - if (!file) return make_parse_result(status_file_not_found); - - // get file size (can result in I/O errors) - size_t size = 0; - xml_parse_status size_status = get_file_size(file, size); - if (size_status != status_ok) return make_parse_result(size_status); - - size_t max_suffix_size = sizeof(char_t); - - // allocate buffer for the whole file - char* contents = static_cast(xml_memory::allocate(size + max_suffix_size)); - if (!contents) return make_parse_result(status_out_of_memory); - - // read file in memory - size_t read_size = fread(contents, 1, size, file); - - if (read_size != size) - { - xml_memory::deallocate(contents); - return make_parse_result(status_io_error); - } - - xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); - - return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); - } +PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) +{ + // get length in utf8 characters + return wchar_decoder::process(str, length, 0, utf8_counter()); +} + +PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) +{ + // convert to utf8 + uint8_t* begin = reinterpret_cast(buffer); + uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); + + assert(begin + size == end); + (void)!end; + (void)!size; +} #ifndef PUGIXML_NO_STL - template struct xml_stream_chunk - { - static xml_stream_chunk* create() - { - void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); - if (!memory) return 0; - - return new (memory) xml_stream_chunk(); - } +PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) +{ + // first pass: get length in utf8 characters + size_t size = as_utf8_begin(str, length); - static void destroy(xml_stream_chunk* chunk) - { - // free chunk chain - while (chunk) - { - xml_stream_chunk* next_ = chunk->next; + // allocate resulting string + std::string result; + result.resize(size); - xml_memory::deallocate(chunk); + // second pass: convert to utf8 + if (size > 0) as_utf8_end(&result[0], size, str, length); - chunk = next_; - } - } + return result; +} - xml_stream_chunk(): next(0), size(0) - { - } +PUGI__FN std::basic_string as_wide_impl(const char* str, size_t size) +{ + const uint8_t* data = reinterpret_cast(str); - xml_stream_chunk* next; - size_t size; + // first pass: get length in wchar_t units + size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); - T data[xml_memory_page_size / sizeof(T)]; - }; + // allocate resulting string + std::basic_string result; + result.resize(length); - template PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream& stream, void** out_buffer, size_t* out_size) - { - auto_deleter > chunks(0, xml_stream_chunk::destroy); + // second pass: convert to wchar_t + if (length > 0) { + wchar_writer::value_type begin = reinterpret_cast(&result[0]); + wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); - // read file to a chunk list - size_t total = 0; - xml_stream_chunk* last = 0; + assert(begin + length == end); + (void)!end; + } - while (!stream.eof()) - { - // allocate new chunk - xml_stream_chunk* chunk = xml_stream_chunk::create(); - if (!chunk) return status_out_of_memory; + return result; +} +#endif - // append chunk to list - if (last) last = last->next = chunk; - else chunks.data = last = chunk; +template +inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) +{ + // never reuse shared memory + if (header & xml_memory_page_contents_shared_mask) return false; - // read data to chunk - stream.read(chunk->data, static_cast(sizeof(chunk->data) / sizeof(T))); - chunk->size = static_cast(stream.gcount()) * sizeof(T); + size_t target_length = strlength(target); - // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors - if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; + // always reuse document buffer memory if possible + if ((header & header_mask) == 0) return target_length >= length; - // guard against huge files (chunk size is small enough to make this overflow check work) - if (total + chunk->size < total) return status_out_of_memory; - total += chunk->size; - } + // reuse heap memory if waste is not too great + const size_t reuse_threshold = 32; - size_t max_suffix_size = sizeof(char_t); + return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); +} - // copy chunk list to a contiguous buffer - char* buffer = static_cast(xml_memory::allocate(total + max_suffix_size)); - if (!buffer) return status_out_of_memory; +template +PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) +{ + if (source_length == 0) { + // empty string and null pointer are equivalent, so just deallocate old memory + xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - char* write = buffer; + if (header & header_mask) alloc->deallocate_string(dest); - for (xml_stream_chunk* chunk = chunks.data; chunk; chunk = chunk->next) - { - assert(write + chunk->size <= buffer + total); - memcpy(write, chunk->data, chunk->size); - write += chunk->size; - } + // mark the string as not allocated + dest = 0; + header &= ~header_mask; - assert(write == buffer + total); + return true; + } else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) { + // we can reuse old buffer, so just copy the new data (including zero terminator) + memcpy(dest, source, source_length * sizeof(char_t)); + dest[source_length] = 0; - // return buffer - *out_buffer = buffer; - *out_size = total; + return true; + } else { + xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - return status_ok; - } + if (!alloc->reserve()) return false; - template PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream& stream, void** out_buffer, size_t* out_size) - { - // get length of remaining data in stream - typename std::basic_istream::pos_type pos = stream.tellg(); - stream.seekg(0, std::ios::end); - std::streamoff length = stream.tellg() - pos; - stream.seekg(pos); + // allocate new buffer + char_t* buf = alloc->allocate_string(source_length + 1); + if (!buf) return false; - if (stream.fail() || pos < 0) return status_io_error; + // copy the string (including zero terminator) + memcpy(buf, source, source_length * sizeof(char_t)); + buf[source_length] = 0; - // guard against huge files - size_t read_length = static_cast(length); + // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) + if (header & header_mask) alloc->deallocate_string(dest); - if (static_cast(read_length) != length || length < 0) return status_out_of_memory; + // the string is now allocated, so set the flag + dest = buf; + header |= header_mask; - size_t max_suffix_size = sizeof(char_t); + return true; + } +} - // read stream data into memory (guard against stream exceptions with buffer holder) - auto_deleter buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); - if (!buffer.data) return status_out_of_memory; +struct gap { + char_t* end; + size_t size; - stream.read(static_cast(buffer.data), static_cast(read_length)); + gap(): end(0), size(0) { + } - // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors - if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; + // Push new gap, move s count bytes further (skipping the gap). + // Collapse previous gap. + void push(char_t*& s, size_t count) { + if (end) { // there was a gap already; collapse it + // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) + assert(s >= end); + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); + } - // return buffer - size_t actual_length = static_cast(stream.gcount()); - assert(actual_length <= read_length); - - *out_buffer = buffer.release(); - *out_size = actual_length * sizeof(T); + s += count; // end of current gap - return status_ok; - } + // "merge" two gaps + end = s; + size += count; + } - template PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) - { - void* buffer = 0; - size_t size = 0; - xml_parse_status status = status_ok; + // Collapse all gaps, return past-the-end pointer + char_t* flush(char_t* s) { + if (end) { + // Move [old_gap_end, current_pos) to [old_gap_start, ...) + assert(s >= end); + memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) - if (stream.fail()) return make_parse_result(status_io_error); + return s - size; + } else return s; + } +}; - // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) - if (stream.tellg() < 0) - { - stream.clear(); // clear error flags that could be set by a failing tellg - status = load_stream_data_noseek(stream, &buffer, &size); - } - else - status = load_stream_data_seek(stream, &buffer, &size); +PUGI__FN char_t* strconv_escape(char_t* s, gap& g) +{ + char_t* stre = s + 1; - if (status != status_ok) return make_parse_result(status); + switch (*stre) { + case '#': { // &#... + unsigned int ucsc = 0; - xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); - - return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); - } + if (stre[1] == 'x') { // &#x... (hex code) + stre += 2; + + char_t ch = *stre; + + if (ch == ';') return stre; + + for (;;) { + if (static_cast(ch - '0') <= 9) + ucsc = 16 * ucsc + (ch - '0'); + else if (static_cast((ch | ' ') - 'a') <= 5) + ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } else { // &#... (dec code) + char_t ch = *++stre; + + if (ch == ';') return stre; + + for (;;) { + if (static_cast(static_cast(ch) - '0') <= 9) + ucsc = 10 * ucsc + (ch - '0'); + else if (ch == ';') + break; + else // cancel + return stre; + + ch = *++stre; + } + + ++stre; + } + +#ifdef PUGIXML_WCHAR_MODE + s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); +#else + s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); +#endif + + g.push(s, stre - s); + return stre; + } + + case 'a': { // &a + ++stre; + + if (*stre == 'm') { // &am + if (*++stre == 'p' && *++stre == ';') { // & + *s++ = '&'; + ++stre; + + g.push(s, stre - s); + return stre; + } + } else if (*stre == 'p') { // &ap + if (*++stre == 'o' && *++stre == 's' && *++stre == ';') { // ' + *s++ = '\''; + ++stre; + + g.push(s, stre - s); + return stre; + } + } + break; + } + + case 'g': { // &g + if (*++stre == 't' && *++stre == ';') { // > + *s++ = '>'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + + case 'l': { // &l + if (*++stre == 't' && *++stre == ';') { // < + *s++ = '<'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + + case 'q': { // &q + if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') { // " + *s++ = '"'; + ++stre; + + g.push(s, stre - s); + return stre; + } + break; + } + + default: + break; + } + + return stre; +} + +// Parser utilities +#define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) +#define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } +#define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) +#define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } +#define PUGI__POPNODE() { cursor = cursor->parent; } +#define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } +#define PUGI__SCANWHILE(X) { while (X) ++s; } +#define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } +#define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } +#define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast(0) +#define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } + +PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) +{ + gap g; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); + + if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) { // comment ends here + *g.flush(s) = 0; + + return s + (s[2] == '>' ? 3 : 2); + } else if (*s == 0) { + return 0; + } else ++s; + } +} + +PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) +{ + gap g; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); + + if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) { // CDATA ends here + *g.flush(s) = 0; + + return s + 1; + } else if (*s == 0) { + return 0; + } else ++s; + } +} + +typedef char_t* (*strconv_pcdata_t)(char_t*); + +template struct strconv_pcdata_impl { + static char_t* parse(char_t* s) { + gap g; + + char_t* begin = s; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); + + if (*s == '<') { // PCDATA ends here + char_t* end = g.flush(s); + + if (opt_trim::value) + while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) + --end; + + *end = 0; + + return s + 1; + } else if (opt_eol::value && *s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair + *s++ = '\n'; // replace first one with 0x0a + + if (*s == '\n') g.push(s, 1); + } else if (opt_escape::value && *s == '&') { + s = strconv_escape(s, g); + } else if (*s == 0) { + char_t* end = g.flush(s); + + if (opt_trim::value) + while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) + --end; + + *end = 0; + + return s; + } else ++s; + } + } +}; + +PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) +{ + PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); + + switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) { // get bitmask for flags (eol escapes trim) + case 0: + return strconv_pcdata_impl::parse; + case 1: + return strconv_pcdata_impl::parse; + case 2: + return strconv_pcdata_impl::parse; + case 3: + return strconv_pcdata_impl::parse; + case 4: + return strconv_pcdata_impl::parse; + case 5: + return strconv_pcdata_impl::parse; + case 6: + return strconv_pcdata_impl::parse; + case 7: + return strconv_pcdata_impl::parse; + default: + assert(false); + return 0; // should not get here + } +} + +typedef char_t* (*strconv_attribute_t)(char_t*, char_t); + +template struct strconv_attribute_impl { + static char_t* parse_wnorm(char_t* s, char_t end_quote) { + gap g; + + // trim leading whitespaces + if (PUGI__IS_CHARTYPE(*s, ct_space)) { + char_t* str = s; + + do ++str; + while (PUGI__IS_CHARTYPE(*str, ct_space)); + + g.push(s, str - s); + } + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); + + if (*s == end_quote) { + char_t* str = g.flush(s); + + do *str-- = 0; + while (PUGI__IS_CHARTYPE(*str, ct_space)); + + return s + 1; + } else if (PUGI__IS_CHARTYPE(*s, ct_space)) { + *s++ = ' '; + + if (PUGI__IS_CHARTYPE(*s, ct_space)) { + char_t* str = s + 1; + while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; + + g.push(s, str - s); + } + } else if (opt_escape::value && *s == '&') { + s = strconv_escape(s, g); + } else if (!*s) { + return 0; + } else ++s; + } + } + + static char_t* parse_wconv(char_t* s, char_t end_quote) { + gap g; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); + + if (*s == end_quote) { + *g.flush(s) = 0; + + return s + 1; + } else if (PUGI__IS_CHARTYPE(*s, ct_space)) { + if (*s == '\r') { + *s++ = ' '; + + if (*s == '\n') g.push(s, 1); + } else *s++ = ' '; + } else if (opt_escape::value && *s == '&') { + s = strconv_escape(s, g); + } else if (!*s) { + return 0; + } else ++s; + } + } + + static char_t* parse_eol(char_t* s, char_t end_quote) { + gap g; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); + + if (*s == end_quote) { + *g.flush(s) = 0; + + return s + 1; + } else if (*s == '\r') { + *s++ = '\n'; + + if (*s == '\n') g.push(s, 1); + } else if (opt_escape::value && *s == '&') { + s = strconv_escape(s, g); + } else if (!*s) { + return 0; + } else ++s; + } + } + + static char_t* parse_simple(char_t* s, char_t end_quote) { + gap g; + + while (true) { + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); + + if (*s == end_quote) { + *g.flush(s) = 0; + + return s + 1; + } else if (opt_escape::value && *s == '&') { + s = strconv_escape(s, g); + } else if (!*s) { + return 0; + } else ++s; + } + } +}; + +PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) +{ + PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); + + switch ((optmask >> 4) & 15) { // get bitmask for flags (wconv wnorm eol escapes) + case 0: + return strconv_attribute_impl::parse_simple; + case 1: + return strconv_attribute_impl::parse_simple; + case 2: + return strconv_attribute_impl::parse_eol; + case 3: + return strconv_attribute_impl::parse_eol; + case 4: + return strconv_attribute_impl::parse_wconv; + case 5: + return strconv_attribute_impl::parse_wconv; + case 6: + return strconv_attribute_impl::parse_wconv; + case 7: + return strconv_attribute_impl::parse_wconv; + case 8: + return strconv_attribute_impl::parse_wnorm; + case 9: + return strconv_attribute_impl::parse_wnorm; + case 10: + return strconv_attribute_impl::parse_wnorm; + case 11: + return strconv_attribute_impl::parse_wnorm; + case 12: + return strconv_attribute_impl::parse_wnorm; + case 13: + return strconv_attribute_impl::parse_wnorm; + case 14: + return strconv_attribute_impl::parse_wnorm; + case 15: + return strconv_attribute_impl::parse_wnorm; + default: + assert(false); + return 0; // should not get here + } +} + +inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) +{ + xml_parse_result result; + result.status = status; + result.offset = offset; + + return result; +} + +struct xml_parser { + xml_allocator alloc; + xml_allocator* alloc_state; + char_t* error_offset; + xml_parse_status error_status; + + xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) { + } + + ~xml_parser() { + *alloc_state = alloc; + } + + // DOCTYPE consists of nested sections of the following possible types: + // , , "...", '...' + // + // + // First group can not contain nested groups + // Second group can contain nested groups of the same type + // Third group can contain all other groups + char_t* parse_doctype_primitive(char_t* s) { + if (*s == '"' || *s == '\'') { + // quoted string + char_t ch = *s++; + PUGI__SCANFOR(*s == ch); + if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); + + s++; + } else if (s[0] == '<' && s[1] == '?') { + // + s += 2; + PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype + if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); + + s += 2; + } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') { + s += 4; + PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype + if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); + + s += 3; + } else PUGI__THROW_ERROR(status_bad_doctype, s); + + return s; + } + + char_t* parse_doctype_ignore(char_t* s) { + size_t depth = 0; + + assert(s[0] == '<' && s[1] == '!' && s[2] == '['); + s += 3; + + while (*s) { + if (s[0] == '<' && s[1] == '!' && s[2] == '[') { + // nested ignore section + s += 3; + depth++; + } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') { + // ignore section end + s += 3; + + if (depth == 0) + return s; + + depth--; + } else s++; + } + + PUGI__THROW_ERROR(status_bad_doctype, s); + } + + char_t* parse_doctype_group(char_t* s, char_t endch) { + size_t depth = 0; + + assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); + s += 2; + + while (*s) { + if (s[0] == '<' && s[1] == '!' && s[2] != '-') { + if (s[2] == '[') { + // ignore + s = parse_doctype_ignore(s); + if (!s) return s; + } else { + // some control group + s += 2; + depth++; + } + } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') { + // unknown tag (forbidden), or some primitive group + s = parse_doctype_primitive(s); + if (!s) return s; + } else if (*s == '>') { + if (depth == 0) + return s; + + depth--; + s++; + } else s++; + } + + if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); + + return s; + } + + char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) { + // parse node contents, starting with exclamation mark + ++s; + + if (*s == '-') { // 'value = s; // Save the offset. + } + + if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) { + s = strconv_comment(s, endch); + + if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); + } else { + // Scan for terminating '-->'. + PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); + PUGI__CHECK_ERROR(status_bad_comment, s); + + if (PUGI__OPTSET(parse_comments)) + *s = 0; // Zero-terminate this segment at the first terminating '-'. + + s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. + } + } else PUGI__THROW_ERROR(status_bad_comment, s); + } else if (*s == '[') { + // 'value = s; // Save the offset. + + if (PUGI__OPTSET(parse_eol)) { + s = strconv_cdata(s, endch); + + if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); + } else { + // Scan for terminating ']]>'. + PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); + PUGI__CHECK_ERROR(status_bad_cdata, s); + + *s++ = 0; // Zero-terminate this segment. + } + } else { // Flagged for discard, but we still have to scan for the terminator. + // Scan for terminating ']]>'. + PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); + PUGI__CHECK_ERROR(status_bad_cdata, s); + + ++s; + } + + s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. + } else PUGI__THROW_ERROR(status_bad_cdata, s); + } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) { + s -= 2; + + if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); + + char_t* mark = s + 9; + + s = parse_doctype_group(s, endch); + if (!s) return s; + + assert((*s == 0 && endch == '>') || *s == '>'); + if (*s) *s++ = 0; + + if (PUGI__OPTSET(parse_doctype)) { + while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; + + PUGI__PUSHNODE(node_doctype); + + cursor->value = mark; + } + } else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); + else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); + else PUGI__THROW_ERROR(status_unrecognized_tag, s); + + return s; + } + + char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) { + // load into registers + xml_node_struct* cursor = ref_cursor; + char_t ch = 0; + + // parse node contents, starting with question mark + ++s; + + // read PI target + char_t* target = s; + + if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); + + PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); + PUGI__CHECK_ERROR(status_bad_pi, s); + + // determine node type; stricmp / strcasecmp is not portable + bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; + + if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) { + if (declaration) { + // disallow non top-level declarations + if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); + + PUGI__PUSHNODE(node_declaration); + } else { + PUGI__PUSHNODE(node_pi); + } + + cursor->name = target; + + PUGI__ENDSEG(); + + // parse value/attributes + if (ch == '?') { + // empty node + if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); + s += (*s == '>'); + + PUGI__POPNODE(); + } else if (PUGI__IS_CHARTYPE(ch, ct_space)) { + PUGI__SKIPWS(); + + // scan for tag end + char_t* value = s; + + PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); + PUGI__CHECK_ERROR(status_bad_pi, s); + + if (declaration) { + // replace ending ? with / so that 'element' terminates properly + *s = '/'; + + // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES + s = value; + } else { + // store value and step over > + cursor->value = value; + + PUGI__POPNODE(); + + PUGI__ENDSEG(); + + s += (*s == '>'); + } + } else PUGI__THROW_ERROR(status_bad_pi, s); + } else { + // scan for tag end + PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); + PUGI__CHECK_ERROR(status_bad_pi, s); + + s += (s[1] == '>' ? 2 : 1); + } + + // store from registers + ref_cursor = cursor; + + return s; + } + + char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) { + strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); + strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); + + char_t ch = 0; + xml_node_struct* cursor = root; + char_t* mark = s; + + while (*s != 0) { + if (*s == '<') { + ++s; + +LOC_TAG: + if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // '<#...' + PUGI__PUSHNODE(node_element); // Append a new node to the tree. + + cursor->name = s; + + PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. + PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. + + if (ch == '>') { + // end of tag + } else if (PUGI__IS_CHARTYPE(ch, ct_space)) { +LOC_ATTRIBUTES: + while (true) { + PUGI__SKIPWS(); // Eat any whitespace. + + if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // <... #... + xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. + if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); + + a->name = s; // Save the offset. + + PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. + PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. + + if (PUGI__IS_CHARTYPE(ch, ct_space)) { + PUGI__SKIPWS(); // Eat any whitespace. + + ch = *s; + ++s; + } + + if (ch == '=') { // '<... #=...' + PUGI__SKIPWS(); // Eat any whitespace. + + if (*s == '"' || *s == '\'') { // '<... #="...' + ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. + ++s; // Step over the quote. + a->value = s; // Save the offset. + + s = strconv_attribute(s, ch); + + if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); + + // After this line the loop continues from the start; + // Whitespaces, / and > are ok, symbols and EOF are wrong, + // everything else will be detected + if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); + } else PUGI__THROW_ERROR(status_bad_attribute, s); + } else PUGI__THROW_ERROR(status_bad_attribute, s); + } else if (*s == '/') { + ++s; + + if (*s == '>') { + PUGI__POPNODE(); + s++; + break; + } else if (*s == 0 && endch == '>') { + PUGI__POPNODE(); + break; + } else PUGI__THROW_ERROR(status_bad_start_element, s); + } else if (*s == '>') { + ++s; + + break; + } else if (*s == 0 && endch == '>') { + break; + } else PUGI__THROW_ERROR(status_bad_start_element, s); + } + + // !!! + } else if (ch == '/') { // '<#.../' + if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); + + PUGI__POPNODE(); // Pop. + + s += (*s == '>'); + } else if (ch == 0) { + // we stepped over null terminator, backtrack & handle closing tag + --s; + + if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); + } else PUGI__THROW_ERROR(status_bad_start_element, s); + } else if (*s == '/') { + ++s; + + char_t* name = cursor->name; + if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); + + while (PUGI__IS_CHARTYPE(*s, ct_symbol)) { + if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); + } + + if (*name) { + if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); + else PUGI__THROW_ERROR(status_end_element_mismatch, s); + } + + PUGI__POPNODE(); // Pop. + + PUGI__SKIPWS(); + + if (*s == 0) { + if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); + } else { + if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); + ++s; + } + } else if (*s == '?') { // 'first_child) continue; + } + } + + if (!PUGI__OPTSET(parse_trim_pcdata)) + s = mark; + + if (cursor->parent || PUGI__OPTSET(parse_fragment)) { + PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. + cursor->value = s; // Save the offset. + + s = strconv_pcdata(s); + + PUGI__POPNODE(); // Pop since this is a standalone. + + if (!*s) break; + } else { + PUGI__SCANFOR(*s == '<'); // '...<' + if (!*s) break; + + ++s; + } + + // We're after '<' + goto LOC_TAG; + } + } + + // check that last tag is closed + if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); + + return s; + } + +#ifdef PUGIXML_WCHAR_MODE + static char_t* parse_skip_bom(char_t* s) { + unsigned int bom = 0xfeff; + return (s[0] == static_cast(bom)) ? s + 1 : s; + } +#else + static char_t* parse_skip_bom(char_t* s) { + return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; + } +#endif + + static bool has_element_node_siblings(xml_node_struct* node) { + while (node) { + if (PUGI__NODETYPE(node) == node_element) return true; + + node = node->next_sibling; + } + + return false; + } + + static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) { + // early-out for empty documents + if (length == 0) + return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); + + // get last child of the root before parsing + xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; + + // create parser on stack + xml_parser parser(static_cast(xmldoc)); + + // save last character and make buffer zero-terminated (speeds up parsing) + char_t endch = buffer[length - 1]; + buffer[length - 1] = 0; + + // skip BOM to make sure it does not end up as part of parse output + char_t* buffer_data = parse_skip_bom(buffer); + + // perform actual parsing + parser.parse_tree(buffer_data, root, optmsk, endch); + + xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); + assert(result.offset >= 0 && static_cast(result.offset) <= length); + + if (result) { + // since we removed last character, we have to handle the only possible false positive (stray <) + if (endch == '<') + return make_parse_result(status_unrecognized_tag, length - 1); + + // check if there are any element nodes parsed + xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; + + if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) + return make_parse_result(status_no_document_element, length - 1); + } else { + // roll back offset if it occurs on a null terminator in the source buffer + if (result.offset > 0 && static_cast(result.offset) == length - 1 && endch == 0) + result.offset--; + } + + return result; + } +}; + +// Output facilities +PUGI__FN xml_encoding get_write_native_encoding() +{ +#ifdef PUGIXML_WCHAR_MODE + return get_wchar_encoding(); +#else + return encoding_utf8; +#endif +} + +PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) +{ + // replace wchar encoding with utf implementation + if (encoding == encoding_wchar) return get_wchar_encoding(); + + // replace utf16 encoding with utf16 with specific endianness + if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + // replace utf32 encoding with utf32 with specific endianness + if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + // only do autodetection if no explicit encoding is requested + if (encoding != encoding_auto) return encoding; + + // assume utf8 encoding + return encoding_utf8; +} + +template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) +{ + PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); + + typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); + + return static_cast(end - dest) * sizeof(*dest); +} + +template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) +{ + PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); + + typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); + + if (opt_swap) { + for (typename T::value_type i = dest; i != end; ++i) + *i = endian_swap(*i); + } + + return static_cast(end - dest) * sizeof(*dest); +} + +#ifdef PUGIXML_WCHAR_MODE +PUGI__FN size_t get_valid_length(const char_t* data, size_t length) +{ + if (length < 1) return 0; + + // discard last character if it's the lead of a surrogate pair + return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; +} + +PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) +{ + // only endian-swapping is required + if (need_endian_swap_utf(encoding, get_wchar_encoding())) { + convert_wchar_endian_swap(r_char, data, length); + + return length * sizeof(char_t); + } + + // convert to utf8 + if (encoding == encoding_utf8) + return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); + + // convert to utf16 + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); + } + + // convert to utf32 + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); + } + + // convert to latin1 + if (encoding == encoding_latin1) + return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); + + assert(!"Invalid encoding"); + return 0; +} +#else +PUGI__FN size_t get_valid_length(const char_t* data, size_t length) +{ + if (length < 5) return 0; + + for (size_t i = 1; i <= 4; ++i) { + uint8_t ch = static_cast(data[length - i]); + + // either a standalone character or a leading one + if ((ch & 0xc0) != 0x80) return length - i; + } + + // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk + return length; +} + +PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) +{ + if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; + + return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); + } + + if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { + xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; + + return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); + } + + if (encoding == encoding_latin1) + return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); + + assert(!"Invalid encoding"); + return 0; +} +#endif + +class xml_buffered_writer +{ + xml_buffered_writer(const xml_buffered_writer&); + xml_buffered_writer& operator=(const xml_buffered_writer&); + +public: + xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) { + PUGI__STATIC_ASSERT(bufcapacity >= 8); + } + + size_t flush() { + flush(buffer, bufsize); + bufsize = 0; + return 0; + } + + void flush(const char_t* data, size_t size) { + if (size == 0) return; + + // fast path, just write data + if (encoding == get_write_native_encoding()) + writer.write(data, size * sizeof(char_t)); + else { + // convert chunk + size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); + assert(result <= sizeof(scratch)); + + // write data + writer.write(scratch.data_u8, result); + } + } + + void write_direct(const char_t* data, size_t length) { + // flush the remaining buffer contents + flush(); + + // handle large chunks + if (length > bufcapacity) { + if (encoding == get_write_native_encoding()) { + // fast path, can just write data chunk + writer.write(data, length * sizeof(char_t)); + return; + } + + // need to convert in suitable chunks + while (length > bufcapacity) { + // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer + // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) + size_t chunk_size = get_valid_length(data, bufcapacity); + assert(chunk_size); + + // convert chunk and write + flush(data, chunk_size); + + // iterate + data += chunk_size; + length -= chunk_size; + } + + // small tail is copied below + bufsize = 0; + } + + memcpy(buffer + bufsize, data, length * sizeof(char_t)); + bufsize += length; + } + + void write_buffer(const char_t* data, size_t length) { + size_t offset = bufsize; + + if (offset + length <= bufcapacity) { + memcpy(buffer + offset, data, length * sizeof(char_t)); + bufsize = offset + length; + } else { + write_direct(data, length); + } + } + + void write_string(const char_t* data) { + // write the part of the string that fits in the buffer + size_t offset = bufsize; + + while (*data && offset < bufcapacity) + buffer[offset++] = *data++; + + // write the rest + if (offset < bufcapacity) { + bufsize = offset; + } else { + // backtrack a bit if we have split the codepoint + size_t length = offset - bufsize; + size_t extra = length - get_valid_length(data - length, length); + + bufsize = offset - extra; + + write_direct(data - extra, strlength(data) + extra); + } + } + + void write(char_t d0) { + size_t offset = bufsize; + if (offset > bufcapacity - 1) offset = flush(); + + buffer[offset + 0] = d0; + bufsize = offset + 1; + } + + void write(char_t d0, char_t d1) { + size_t offset = bufsize; + if (offset > bufcapacity - 2) offset = flush(); + + buffer[offset + 0] = d0; + buffer[offset + 1] = d1; + bufsize = offset + 2; + } + + void write(char_t d0, char_t d1, char_t d2) { + size_t offset = bufsize; + if (offset > bufcapacity - 3) offset = flush(); + + buffer[offset + 0] = d0; + buffer[offset + 1] = d1; + buffer[offset + 2] = d2; + bufsize = offset + 3; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3) { + size_t offset = bufsize; + if (offset > bufcapacity - 4) offset = flush(); + + buffer[offset + 0] = d0; + buffer[offset + 1] = d1; + buffer[offset + 2] = d2; + buffer[offset + 3] = d3; + bufsize = offset + 4; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) { + size_t offset = bufsize; + if (offset > bufcapacity - 5) offset = flush(); + + buffer[offset + 0] = d0; + buffer[offset + 1] = d1; + buffer[offset + 2] = d2; + buffer[offset + 3] = d3; + buffer[offset + 4] = d4; + bufsize = offset + 5; + } + + void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) { + size_t offset = bufsize; + if (offset > bufcapacity - 6) offset = flush(); + + buffer[offset + 0] = d0; + buffer[offset + 1] = d1; + buffer[offset + 2] = d2; + buffer[offset + 3] = d3; + buffer[offset + 4] = d4; + buffer[offset + 5] = d5; + bufsize = offset + 6; + } + + // utf8 maximum expansion: x4 (-> utf32) + // utf16 maximum expansion: x2 (-> utf32) + // utf32 maximum expansion: x1 + enum { + bufcapacitybytes = +#ifdef PUGIXML_MEMORY_OUTPUT_STACK + PUGIXML_MEMORY_OUTPUT_STACK +#else + 10240 +#endif + , + bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) + }; + + char_t buffer[bufcapacity]; + + union { + uint8_t data_u8[4 * bufcapacity]; + uint16_t data_u16[2 * bufcapacity]; + uint32_t data_u32[bufcapacity]; + char_t data_char[bufcapacity]; + } scratch; + + xml_writer& writer; + size_t bufsize; + xml_encoding encoding; +}; + +PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) +{ + while (*s) { + const char_t* prev = s; + + // While *s is a usual symbol + PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); + + writer.write_buffer(prev, static_cast(s - prev)); + + switch (*s) { + case 0: + break; + case '&': + writer.write('&', 'a', 'm', 'p', ';'); + ++s; + break; + case '<': + writer.write('&', 'l', 't', ';'); + ++s; + break; + case '>': + writer.write('&', 'g', 't', ';'); + ++s; + break; + case '"': + writer.write('&', 'q', 'u', 'o', 't', ';'); + ++s; + break; + default: { // s is not a usual symbol + unsigned int ch = static_cast(*s++); + assert(ch < 32); + + writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); + } + } + } +} + +PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) +{ + if (flags & format_no_escapes) + writer.write_string(s); + else + text_output_escaped(writer, s, type); +} + +PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) +{ + do { + writer.write('<', '!', '[', 'C', 'D'); + writer.write('A', 'T', 'A', '['); + + const char_t* prev = s; + + // look for ]]> sequence - we can't output it as is since it terminates CDATA + while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; + + // skip ]] if we stopped at ]]>, > will go to the next CDATA section + if (*s) s += 2; + + writer.write_buffer(prev, static_cast(s - prev)); + + writer.write(']', ']', '>'); + } while (*s); +} + +PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) +{ + switch (indent_length) { + case 1: { + for (unsigned int i = 0; i < depth; ++i) + writer.write(indent[0]); + break; + } + + case 2: { + for (unsigned int i = 0; i < depth; ++i) + writer.write(indent[0], indent[1]); + break; + } + + case 3: { + for (unsigned int i = 0; i < depth; ++i) + writer.write(indent[0], indent[1], indent[2]); + break; + } + + case 4: { + for (unsigned int i = 0; i < depth; ++i) + writer.write(indent[0], indent[1], indent[2], indent[3]); + break; + } + + default: { + for (unsigned int i = 0; i < depth; ++i) + writer.write_buffer(indent, indent_length); + } + } +} + +PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) +{ + writer.write('<', '!', '-', '-'); + + while (*s) { + const char_t* prev = s; + + // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body + while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; + + writer.write_buffer(prev, static_cast(s - prev)); + + if (*s) { + assert(*s == '-'); + + writer.write('-', ' '); + ++s; + } + } + + writer.write('-', '-', '>'); +} + +PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) +{ + while (*s) { + const char_t* prev = s; + + // look for ?> sequence - we can't output it since ?> terminates PI + while (*s && !(s[0] == '?' && s[1] == '>')) ++s; + + writer.write_buffer(prev, static_cast(s - prev)); + + if (*s) { + assert(s[0] == '?' && s[1] == '>'); + + writer.write('?', ' ', '>'); + s += 2; + } + } +} + +PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) +{ + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) { + if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) { + writer.write('\n'); + + text_output_indent(writer, indent, indent_length, depth + 1); + } else { + writer.write(' '); + } + + writer.write_string(a->name ? a->name + 0 : default_name); + writer.write('=', '"'); + + if (a->value) + text_output(writer, a->value, ctx_special_attr, flags); + + writer.write('"'); + } +} + +PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) +{ + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + const char_t* name = node->name ? node->name + 0 : default_name; + + writer.write('<'); + writer.write_string(name); + + if (node->first_attribute) + node_output_attributes(writer, node, indent, indent_length, flags, depth); + + if (!node->first_child) { + writer.write(' ', '/', '>'); + + return false; + } else { + writer.write('>'); + + return true; + } +} + +PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) +{ + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + const char_t* name = node->name ? node->name + 0 : default_name; + + writer.write('<', '/'); + writer.write_string(name); + writer.write('>'); +} + +PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) +{ + const char_t* default_name = PUGIXML_TEXT(":anonymous"); + + switch (PUGI__NODETYPE(node)) { + case node_pcdata: + text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); + break; + + case node_cdata: + text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); + break; + + case node_comment: + node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); + break; + + case node_pi: + writer.write('<', '?'); + writer.write_string(node->name ? node->name + 0 : default_name); + + if (node->value) { + writer.write(' '); + node_output_pi_value(writer, node->value); + } + + writer.write('?', '>'); + break; + + case node_declaration: + writer.write('<', '?'); + writer.write_string(node->name ? node->name + 0 : default_name); + node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); + writer.write('?', '>'); + break; + + case node_doctype: + writer.write('<', '!', 'D', 'O', 'C'); + writer.write('T', 'Y', 'P', 'E'); + + if (node->value) { + writer.write(' '); + writer.write_string(node->value); + } + + writer.write('>'); + break; + + default: + assert(!"Invalid node type"); + } +} + +enum indent_flags_t { + indent_newline = 1, + indent_indent = 2 +}; + +PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) +{ + size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; + unsigned int indent_flags = indent_indent; + + xml_node_struct* node = root; + + do { + assert(node); + + // begin writing current node + if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) { + node_output_simple(writer, node, flags); + + indent_flags = 0; + } else { + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); + + if ((indent_flags & indent_indent) && indent_length) + text_output_indent(writer, indent, indent_length, depth); + + if (PUGI__NODETYPE(node) == node_element) { + indent_flags = indent_newline | indent_indent; + + if (node_output_start(writer, node, indent, indent_length, flags, depth)) { + node = node->first_child; + depth++; + continue; + } + } else if (PUGI__NODETYPE(node) == node_document) { + indent_flags = indent_indent; + + if (node->first_child) { + node = node->first_child; + continue; + } + } else { + node_output_simple(writer, node, flags); + + indent_flags = indent_newline | indent_indent; + } + } + + // continue to the next node + while (node != root) { + if (node->next_sibling) { + node = node->next_sibling; + break; + } + + node = node->parent; + + // write closing node + if (PUGI__NODETYPE(node) == node_element) { + depth--; + + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); + + if ((indent_flags & indent_indent) && indent_length) + text_output_indent(writer, indent, indent_length, depth); + + node_output_end(writer, node); + + indent_flags = indent_newline | indent_indent; + } + } + } while (node != root); + + if ((indent_flags & indent_newline) && (flags & format_raw) == 0) + writer.write('\n'); +} + +PUGI__FN bool has_declaration(xml_node_struct* node) +{ + for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) { + xml_node_type type = PUGI__NODETYPE(child); + + if (type == node_declaration) return true; + if (type == node_element) return false; + } + + return false; +} + +PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) +{ + for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) + if (a == attr) + return true; + + return false; +} + +PUGI__FN bool allow_insert_attribute(xml_node_type parent) +{ + return parent == node_element || parent == node_declaration; +} + +PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) +{ + if (parent != node_document && parent != node_element) return false; + if (child == node_document || child == node_null) return false; + if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; + + return true; +} + +PUGI__FN bool allow_move(xml_node parent, xml_node child) +{ + // check that child can be a child of parent + if (!allow_insert_child(parent.type(), child.type())) + return false; + + // check that node is not moved between documents + if (parent.root() != child.root()) + return false; + + // check that new parent is not in the child subtree + xml_node cur = parent; + + while (cur) { + if (cur == child) + return false; + + cur = cur.parent(); + } + + return true; +} + +template +PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) +{ + assert(!dest && (header & header_mask) == 0); + + if (source) { + if (alloc && (source_header & header_mask) == 0) { + dest = source; + + // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared + header |= xml_memory_page_contents_shared_mask; + source_header |= xml_memory_page_contents_shared_mask; + } else + strcpy_insitu(dest, header, header_mask, source, strlength(source)); + } +} + +PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) +{ + node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); + node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); + + for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) { + xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); + + if (da) { + node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); + node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); + } + } +} + +PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) +{ + xml_allocator& alloc = get_allocator(dn); + xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; + + node_copy_contents(dn, sn, shared_alloc); + + xml_node_struct* dit = dn; + xml_node_struct* sit = sn->first_child; + + while (sit && sit != sn) { + if (sit != dn) { + xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); + + if (copy) { + node_copy_contents(copy, sit, shared_alloc); + + if (sit->first_child) { + dit = copy; + sit = sit->first_child; + continue; + } + } + } + + // continue to the next node + do { + if (sit->next_sibling) { + sit = sit->next_sibling; + break; + } + + sit = sit->parent; + dit = dit->parent; + } while (sit != sn); + } +} + +PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) +{ + xml_allocator& alloc = get_allocator(da); + xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; + + node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); + node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); +} + +inline bool is_text_node(xml_node_struct* node) +{ + xml_node_type type = PUGI__NODETYPE(node); + + return type == node_pcdata || type == node_cdata; +} + +// get value with conversion functions +template U string_to_integer(const char_t* value, U minneg, U maxpos) +{ + U result = 0; + const char_t* s = value; + + while (PUGI__IS_CHARTYPE(*s, ct_space)) + s++; + + bool negative = (*s == '-'); + + s += (*s == '+' || *s == '-'); + + bool overflow = false; + + if (s[0] == '0' && (s[1] | ' ') == 'x') { + s += 2; + + // since overflow detection relies on length of the sequence skip leading zeros + while (*s == '0') + s++; + + const char_t* start = s; + + for (;;) { + if (static_cast(*s - '0') < 10) + result = result * 16 + (*s - '0'); + else if (static_cast((*s | ' ') - 'a') < 6) + result = result * 16 + ((*s | ' ') - 'a' + 10); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + overflow = digits > sizeof(U) * 2; + } else { + // since overflow detection relies on length of the sequence skip leading zeros + while (*s == '0') + s++; + + const char_t* start = s; + + for (;;) { + if (static_cast(*s - '0') < 10) + result = result * 10 + (*s - '0'); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); + + const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; + const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; + const size_t high_bit = sizeof(U) * 8 - 1; + + overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); + } + + if (negative) + return (overflow || result > minneg) ? 0 - minneg : 0 - result; + else + return (overflow || result > maxpos) ? maxpos : result; +} + +PUGI__FN int get_value_int(const char_t* value) +{ + return string_to_integer(value, 0 - static_cast(INT_MIN), INT_MAX); +} + +PUGI__FN unsigned int get_value_uint(const char_t* value) +{ + return string_to_integer(value, 0, UINT_MAX); +} + +PUGI__FN double get_value_double(const char_t* value) +{ +#ifdef PUGIXML_WCHAR_MODE + return wcstod(value, 0); +#else + return strtod(value, 0); +#endif +} + +PUGI__FN float get_value_float(const char_t* value) +{ +#ifdef PUGIXML_WCHAR_MODE + return static_cast(wcstod(value, 0)); +#else + return static_cast(strtod(value, 0)); +#endif +} + +PUGI__FN bool get_value_bool(const char_t* value) +{ + // only look at first char + char_t first = *value; + + // 1*, t* (true), T* (True), y* (yes), Y* (YES) + return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); +} + +#ifdef PUGIXML_HAS_LONG_LONG +PUGI__FN long long get_value_llong(const char_t* value) +{ + return string_to_integer(value, 0 - static_cast(LLONG_MIN), LLONG_MAX); +} + +PUGI__FN unsigned long long get_value_ullong(const char_t* value) +{ + return string_to_integer(value, 0, ULLONG_MAX); +} +#endif + +template +PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) +{ + char_t* result = end - 1; + U rest = negative ? 0 - value : value; + + do { + *result-- = static_cast('0' + (rest % 10)); + rest /= 10; + } while (rest); + + assert(result >= begin); + (void)begin; + + *result = '-'; + + return result + !negative; +} + +// set value with conversion functions +template +PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) +{ +#ifdef PUGIXML_WCHAR_MODE + char_t wbuf[128]; + assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); + + size_t offset = 0; + for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; + + return strcpy_insitu(dest, header, header_mask, wbuf, offset); +#else + return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); +#endif +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) +{ + char_t buf[64]; + char_t* end = buf + sizeof(buf) / sizeof(buf[0]); + char_t* begin = integer_to_string(buf, end, value, value < 0); + + return strcpy_insitu(dest, header, header_mask, begin, end - begin); +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) +{ + char_t buf[64]; + char_t* end = buf + sizeof(buf) / sizeof(buf[0]); + char_t* begin = integer_to_string(buf, end, value, false); + + return strcpy_insitu(dest, header, header_mask, begin, end - begin); +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) +{ + char buf[128]; + sprintf(buf, "%.9g", value); + + return set_value_ascii(dest, header, header_mask, buf); +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) +{ + char buf[128]; + sprintf(buf, "%.17g", value); + + return set_value_ascii(dest, header, header_mask, buf); +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) +{ + return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); +} + +#ifdef PUGIXML_HAS_LONG_LONG +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) +{ + char_t buf[64]; + char_t* end = buf + sizeof(buf) / sizeof(buf[0]); + char_t* begin = integer_to_string(buf, end, value, value < 0); + + return strcpy_insitu(dest, header, header_mask, begin, end - begin); +} + +template +PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) +{ + char_t buf[64]; + char_t* end = buf + sizeof(buf) / sizeof(buf[0]); + char_t* begin = integer_to_string(buf, end, value, false); + + return strcpy_insitu(dest, header, header_mask, begin, end - begin); +} +#endif + +PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) +{ + // check input buffer + if (!contents && size) return make_parse_result(status_io_error); + + // get actual encoding + xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); + + // get private buffer + char_t* buffer = 0; + size_t length = 0; + + if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); + + // delete original buffer if we performed a conversion + if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); + + // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself + if (own || buffer != contents) *out_buffer = buffer; + + // store buffer for offset_debug + doc->buffer = buffer; + + // parse + xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); + + // remember encoding + res.encoding = buffer_encoding; + + return res; +} + +// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick +PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) +{ +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) + // there are 64-bit versions of fseek/ftell, let's use them + typedef __int64 length_type; + + _fseeki64(file, 0, SEEK_END); + length_type length = _ftelli64(file); + _fseeki64(file, 0, SEEK_SET); +#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) + // there are 64-bit versions of fseek/ftell, let's use them + typedef off64_t length_type; + + fseeko64(file, 0, SEEK_END); + length_type length = ftello64(file); + fseeko64(file, 0, SEEK_SET); +#else + // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. + typedef long length_type; + + fseek(file, 0, SEEK_END); + length_type length = ftell(file); + fseek(file, 0, SEEK_SET); +#endif + + // check for I/O errors + if (length < 0) return status_io_error; + + // check for overflow + size_t result = static_cast(length); + + if (static_cast(result) != length) return status_out_of_memory; + + // finalize + out_result = result; + + return status_ok; +} + +// This function assumes that buffer has extra sizeof(char_t) writable bytes after size +PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) +{ + // We only need to zero-terminate if encoding conversion does not do it for us +#ifdef PUGIXML_WCHAR_MODE + xml_encoding wchar_encoding = get_wchar_encoding(); + + if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) { + size_t length = size / sizeof(char_t); + + static_cast(buffer)[length] = 0; + return (length + 1) * sizeof(char_t); + } +#else + if (encoding == encoding_utf8) { + static_cast(buffer)[size] = 0; + return size + 1; + } +#endif + + return size; +} + +PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) +{ + if (!file) return make_parse_result(status_file_not_found); + + // get file size (can result in I/O errors) + size_t size = 0; + xml_parse_status size_status = get_file_size(file, size); + if (size_status != status_ok) return make_parse_result(size_status); + + size_t max_suffix_size = sizeof(char_t); + + // allocate buffer for the whole file + char* contents = static_cast(xml_memory::allocate(size + max_suffix_size)); + if (!contents) return make_parse_result(status_out_of_memory); + + // read file in memory + size_t read_size = fread(contents, 1, size, file); + + if (read_size != size) { + xml_memory::deallocate(contents); + return make_parse_result(status_io_error); + } + + xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); + + return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); +} + +#ifndef PUGIXML_NO_STL +template struct xml_stream_chunk { + static xml_stream_chunk* create() { + void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); + if (!memory) return 0; + + return new (memory) xml_stream_chunk(); + } + + static void destroy(xml_stream_chunk* chunk) { + // free chunk chain + while (chunk) { + xml_stream_chunk* next_ = chunk->next; + + xml_memory::deallocate(chunk); + + chunk = next_; + } + } + + xml_stream_chunk(): next(0), size(0) { + } + + xml_stream_chunk* next; + size_t size; + + T data[xml_memory_page_size / sizeof(T)]; +}; + +template PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream& stream, void** out_buffer, size_t* out_size) +{ + auto_deleter > chunks(0, xml_stream_chunk::destroy); + + // read file to a chunk list + size_t total = 0; + xml_stream_chunk* last = 0; + + while (!stream.eof()) { + // allocate new chunk + xml_stream_chunk* chunk = xml_stream_chunk::create(); + if (!chunk) return status_out_of_memory; + + // append chunk to list + if (last) last = last->next = chunk; + else chunks.data = last = chunk; + + // read data to chunk + stream.read(chunk->data, static_cast(sizeof(chunk->data) / sizeof(T))); + chunk->size = static_cast(stream.gcount()) * sizeof(T); + + // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors + if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; + + // guard against huge files (chunk size is small enough to make this overflow check work) + if (total + chunk->size < total) return status_out_of_memory; + total += chunk->size; + } + + size_t max_suffix_size = sizeof(char_t); + + // copy chunk list to a contiguous buffer + char* buffer = static_cast(xml_memory::allocate(total + max_suffix_size)); + if (!buffer) return status_out_of_memory; + + char* write = buffer; + + for (xml_stream_chunk* chunk = chunks.data; chunk; chunk = chunk->next) { + assert(write + chunk->size <= buffer + total); + memcpy(write, chunk->data, chunk->size); + write += chunk->size; + } + + assert(write == buffer + total); + + // return buffer + *out_buffer = buffer; + *out_size = total; + + return status_ok; +} + +template PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream& stream, void** out_buffer, size_t* out_size) +{ + // get length of remaining data in stream + typename std::basic_istream::pos_type pos = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streamoff length = stream.tellg() - pos; + stream.seekg(pos); + + if (stream.fail() || pos < 0) return status_io_error; + + // guard against huge files + size_t read_length = static_cast(length); + + if (static_cast(read_length) != length || length < 0) return status_out_of_memory; + + size_t max_suffix_size = sizeof(char_t); + + // read stream data into memory (guard against stream exceptions with buffer holder) + auto_deleter buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); + if (!buffer.data) return status_out_of_memory; + + stream.read(static_cast(buffer.data), static_cast(read_length)); + + // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors + if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; + + // return buffer + size_t actual_length = static_cast(stream.gcount()); + assert(actual_length <= read_length); + + *out_buffer = buffer.release(); + *out_size = actual_length * sizeof(T); + + return status_ok; +} + +template PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) +{ + void* buffer = 0; + size_t size = 0; + xml_parse_status status = status_ok; + + // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) + if (stream.fail()) return make_parse_result(status_io_error); + + // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) + if (stream.tellg() < 0) { + stream.clear(); // clear error flags that could be set by a failing tellg + status = load_stream_data_noseek(stream, &buffer, &size); + } else + status = load_stream_data_seek(stream, &buffer, &size); + + if (status != status_ok) return make_parse_result(status); + + xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); + + return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); +} #endif #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) - PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) - { - return _wfopen(path, mode); - } +PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) +{ + return _wfopen(path, mode); +} #else - PUGI__FN char* convert_path_heap(const wchar_t* str) - { - assert(str); +PUGI__FN char* convert_path_heap(const wchar_t* str) +{ + assert(str); - // first pass: get length in utf8 characters - size_t length = strlength_wide(str); - size_t size = as_utf8_begin(str, length); + // first pass: get length in utf8 characters + size_t length = strlength_wide(str); + size_t size = as_utf8_begin(str, length); - // allocate resulting string - char* result = static_cast(xml_memory::allocate(size + 1)); - if (!result) return 0; + // allocate resulting string + char* result = static_cast(xml_memory::allocate(size + 1)); + if (!result) return 0; - // second pass: convert to utf8 - as_utf8_end(result, size, str, length); + // second pass: convert to utf8 + as_utf8_end(result, size, str, length); - // zero-terminate - result[size] = 0; + // zero-terminate + result[size] = 0; - return result; - } + return result; +} - PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) - { - // there is no standard function to open wide paths, so our best bet is to try utf8 path - char* path_utf8 = convert_path_heap(path); - if (!path_utf8) return 0; +PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) +{ + // there is no standard function to open wide paths, so our best bet is to try utf8 path + char* path_utf8 = convert_path_heap(path); + if (!path_utf8) return 0; - // convert mode to ASCII (we mirror _wfopen interface) - char mode_ascii[4] = {0}; - for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast(mode[i]); + // convert mode to ASCII (we mirror _wfopen interface) + char mode_ascii[4] = {0}; + for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast(mode[i]); - // try to open the utf8 path - FILE* result = fopen(path_utf8, mode_ascii); + // try to open the utf8 path + FILE* result = fopen(path_utf8, mode_ascii); - // free dummy buffer - xml_memory::deallocate(path_utf8); + // free dummy buffer + xml_memory::deallocate(path_utf8); - return result; - } + return result; +} #endif - PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) - { - if (!file) return false; +PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) +{ + if (!file) return false; - xml_writer_file writer(file); - doc.save(writer, indent, flags, encoding); + xml_writer_file writer(file); + doc.save(writer, indent, flags, encoding); - return ferror(file) == 0; - } + return ferror(file) == 0; +} - struct name_null_sentry - { - xml_node_struct* node; - char_t* name; +struct name_null_sentry { + xml_node_struct* node; + char_t* name; - name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) - { - node->name = 0; - } + name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) { + node->name = 0; + } - ~name_null_sentry() - { - node->name = name; - } - }; + ~name_null_sentry() { + node->name = name; + } +}; PUGI__NS_END namespace pugi { - PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) - { - } +PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) +{ +} - PUGI__FN void xml_writer_file::write(const void* data, size_t size) - { - size_t result = fwrite(data, 1, size, static_cast(file)); - (void)!result; // unfortunately we can't do proper error handling here - } +PUGI__FN void xml_writer_file::write(const void* data, size_t size) +{ + size_t result = fwrite(data, 1, size, static_cast(file)); + (void)!result; // unfortunately we can't do proper error handling here +} #ifndef PUGIXML_NO_STL - PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) - { - } +PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) +{ +} - PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) - { - } +PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) +{ +} - PUGI__FN void xml_writer_stream::write(const void* data, size_t size) - { - if (narrow_stream) - { - assert(!wide_stream); - narrow_stream->write(reinterpret_cast(data), static_cast(size)); - } - else - { - assert(wide_stream); - assert(size % sizeof(wchar_t) == 0); +PUGI__FN void xml_writer_stream::write(const void* data, size_t size) +{ + if (narrow_stream) { + assert(!wide_stream); + narrow_stream->write(reinterpret_cast(data), static_cast(size)); + } else { + assert(wide_stream); + assert(size % sizeof(wchar_t) == 0); - wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); - } - } + wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); + } +} #endif - PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) - { - } - - PUGI__FN xml_tree_walker::~xml_tree_walker() - { - } +PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) +{ +} - PUGI__FN int xml_tree_walker::depth() const - { - return _depth; - } +PUGI__FN xml_tree_walker::~xml_tree_walker() +{ +} - PUGI__FN bool xml_tree_walker::begin(xml_node&) - { - return true; - } +PUGI__FN int xml_tree_walker::depth() const +{ + return _depth; +} - PUGI__FN bool xml_tree_walker::end(xml_node&) - { - return true; - } +PUGI__FN bool xml_tree_walker::begin(xml_node&) +{ + return true; +} - PUGI__FN xml_attribute::xml_attribute(): _attr(0) - { - } +PUGI__FN bool xml_tree_walker::end(xml_node&) +{ + return true; +} - PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) - { - } +PUGI__FN xml_attribute::xml_attribute(): _attr(0) +{ +} - PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) - { - } +PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) +{ +} - PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const - { - return _attr ? unspecified_bool_xml_attribute : 0; - } +PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) +{ +} - PUGI__FN bool xml_attribute::operator!() const - { - return !_attr; - } +PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const +{ + return _attr ? unspecified_bool_xml_attribute : 0; +} - PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const - { - return (_attr == r._attr); - } - - PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const - { - return (_attr != r._attr); - } +PUGI__FN bool xml_attribute::operator!() const +{ + return !_attr; +} - PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const - { - return (_attr < r._attr); - } - - PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const - { - return (_attr > r._attr); - } - - PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const - { - return (_attr <= r._attr); - } - - PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const - { - return (_attr >= r._attr); - } +PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const +{ + return (_attr == r._attr); +} - PUGI__FN xml_attribute xml_attribute::next_attribute() const - { - return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); - } +PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const +{ + return (_attr != r._attr); +} - PUGI__FN xml_attribute xml_attribute::previous_attribute() const - { - return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); - } +PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const +{ + return (_attr < r._attr); +} - PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const - { - return (_attr && _attr->value) ? _attr->value + 0 : def; - } +PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const +{ + return (_attr > r._attr); +} - PUGI__FN int xml_attribute::as_int(int def) const - { - return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; - } +PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const +{ + return (_attr <= r._attr); +} - PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const - { - return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; - } +PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const +{ + return (_attr >= r._attr); +} - PUGI__FN double xml_attribute::as_double(double def) const - { - return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; - } +PUGI__FN xml_attribute xml_attribute::next_attribute() const +{ + return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); +} - PUGI__FN float xml_attribute::as_float(float def) const - { - return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; - } +PUGI__FN xml_attribute xml_attribute::previous_attribute() const +{ + return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); +} - PUGI__FN bool xml_attribute::as_bool(bool def) const - { - return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; - } +PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const +{ + return (_attr && _attr->value) ? _attr->value + 0 : def; +} + +PUGI__FN int xml_attribute::as_int(int def) const +{ + return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; +} + +PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const +{ + return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; +} + +PUGI__FN double xml_attribute::as_double(double def) const +{ + return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; +} + +PUGI__FN float xml_attribute::as_float(float def) const +{ + return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; +} + +PUGI__FN bool xml_attribute::as_bool(bool def) const +{ + return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long xml_attribute::as_llong(long long def) const - { - return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; - } +PUGI__FN long long xml_attribute::as_llong(long long def) const +{ + return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; +} - PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const - { - return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; - } +PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const +{ + return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; +} #endif - PUGI__FN bool xml_attribute::empty() const - { - return !_attr; - } +PUGI__FN bool xml_attribute::empty() const +{ + return !_attr; +} - PUGI__FN const char_t* xml_attribute::name() const - { - return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); - } +PUGI__FN const char_t* xml_attribute::name() const +{ + return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); +} - PUGI__FN const char_t* xml_attribute::value() const - { - return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); - } +PUGI__FN const char_t* xml_attribute::value() const +{ + return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); +} - PUGI__FN size_t xml_attribute::hash_value() const - { - return static_cast(reinterpret_cast(_attr) / sizeof(xml_attribute_struct)); - } +PUGI__FN size_t xml_attribute::hash_value() const +{ + return static_cast(reinterpret_cast(_attr) / sizeof(xml_attribute_struct)); +} - PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const - { - return _attr; - } +PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const +{ + return _attr; +} - PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) +{ + set_value(rhs); + return *this; +} - PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) +{ + set_value(rhs); + return *this; +} - PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) +{ + set_value(rhs); + return *this; +} - PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) +{ + set_value(rhs); + return *this; +} + +PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) +{ + set_value(rhs); + return *this; +} + +PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) +{ + set_value(rhs); + return *this; +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) +{ + set_value(rhs); + return *this; +} - PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) - { - set_value(rhs); - return *this; - } +PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) +{ + set_value(rhs); + return *this; +} #endif - PUGI__FN bool xml_attribute::set_name(const char_t* rhs) - { - if (!_attr) return false; - - return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN bool xml_attribute::set_value(const char_t* rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_name(const char_t* rhs) +{ + if (!_attr) return false; - return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); - } + return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); +} - PUGI__FN bool xml_attribute::set_value(int rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_value(const char_t* rhs) +{ + if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } + return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); +} - PUGI__FN bool xml_attribute::set_value(unsigned int rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_value(int rhs) +{ + if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} - PUGI__FN bool xml_attribute::set_value(double rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_value(unsigned int rhs) +{ + if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(float rhs) - { - if (!_attr) return false; + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } +PUGI__FN bool xml_attribute::set_value(double rhs) +{ + if (!_attr) return false; - PUGI__FN bool xml_attribute::set_value(bool rhs) - { - if (!_attr) return false; + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } +PUGI__FN bool xml_attribute::set_value(float rhs) +{ + if (!_attr) return false; + + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} + +PUGI__FN bool xml_attribute::set_value(bool rhs) +{ + if (!_attr) return false; + + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool xml_attribute::set_value(long long rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_value(long long rhs) +{ + if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} - PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) - { - if (!_attr) return false; +PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) +{ + if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); +} #endif #ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs && rhs; - } +PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) +{ + return (bool)lhs && rhs; +} - PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs || rhs; - } +PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) +{ + return (bool)lhs || rhs; +} #endif - PUGI__FN xml_node::xml_node(): _root(0) - { - } - - PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) - { - } - - PUGI__FN static void unspecified_bool_xml_node(xml_node***) - { - } - - PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const - { - return _root ? unspecified_bool_xml_node : 0; - } - - PUGI__FN bool xml_node::operator!() const - { - return !_root; - } - - PUGI__FN xml_node::iterator xml_node::begin() const - { - return iterator(_root ? _root->first_child + 0 : 0, _root); - } - - PUGI__FN xml_node::iterator xml_node::end() const - { - return iterator(0, _root); - } - - PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const - { - return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); - } - - PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const - { - return attribute_iterator(0, _root); - } - - PUGI__FN xml_object_range xml_node::children() const - { - return xml_object_range(begin(), end()); - } - - PUGI__FN xml_object_range xml_node::children(const char_t* name_) const - { - return xml_object_range(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); - } - - PUGI__FN xml_object_range xml_node::attributes() const - { - return xml_object_range(attributes_begin(), attributes_end()); - } - - PUGI__FN bool xml_node::operator==(const xml_node& r) const - { - return (_root == r._root); - } - - PUGI__FN bool xml_node::operator!=(const xml_node& r) const - { - return (_root != r._root); - } - - PUGI__FN bool xml_node::operator<(const xml_node& r) const - { - return (_root < r._root); - } - - PUGI__FN bool xml_node::operator>(const xml_node& r) const - { - return (_root > r._root); - } - - PUGI__FN bool xml_node::operator<=(const xml_node& r) const - { - return (_root <= r._root); - } - - PUGI__FN bool xml_node::operator>=(const xml_node& r) const - { - return (_root >= r._root); - } - - PUGI__FN bool xml_node::empty() const - { - return !_root; - } - - PUGI__FN const char_t* xml_node::name() const - { - return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN xml_node_type xml_node::type() const - { - return _root ? PUGI__NODETYPE(_root) : node_null; - } - - PUGI__FN const char_t* xml_node::value() const - { - return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN xml_node xml_node::child(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequal(name_, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_node xml_node::next_sibling() const - { - return _root ? xml_node(_root->next_sibling) : xml_node(); - } - - PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const - { - xml_attribute_struct* hint = hint_._attr; - - // if hint is not an attribute of node, behavior is not defined - assert(!hint || (_root && impl::is_attribute_of(hint, _root))); - - if (!_root) return xml_attribute(); - - // optimistically search from hint up until the end - for (xml_attribute_struct* i = hint; i; i = i->next_attribute) - if (i->name && impl::strequal(name_, i->name)) - { - // update hint to maximize efficiency of searching for consecutive attributes - hint_._attr = i->next_attribute; - - return xml_attribute(i); - } - - // wrap around and search from the first attribute until the hint - // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails - for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) - if (j->name && impl::strequal(name_, j->name)) - { - // update hint to maximize efficiency of searching for consecutive attributes - hint_._attr = j->next_attribute; - - return xml_attribute(j); - } - - return xml_attribute(); - } - - PUGI__FN xml_node xml_node::previous_sibling() const - { - if (!_root) return xml_node(); - - if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); - else return xml_node(); - } - - PUGI__FN xml_node xml_node::parent() const - { - return _root ? xml_node(_root->parent) : xml_node(); - } - - PUGI__FN xml_node xml_node::root() const - { - return _root ? xml_node(&impl::get_document(_root)) : xml_node(); - } - - PUGI__FN xml_text xml_node::text() const - { - return xml_text(_root); - } - - PUGI__FN const char_t* xml_node::child_value() const - { - if (!_root) return PUGIXML_TEXT(""); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (impl::is_text_node(i) && i->value) - return i->value; - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const - { - return child(name_).child_value(); - } - - PUGI__FN xml_attribute xml_node::first_attribute() const - { - return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); - } - - PUGI__FN xml_attribute xml_node::last_attribute() const - { - return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); - } - - PUGI__FN xml_node xml_node::first_child() const - { - return _root ? xml_node(_root->first_child) : xml_node(); - } - - PUGI__FN xml_node xml_node::last_child() const - { - return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); - } - - PUGI__FN bool xml_node::set_name(const char_t* rhs) - { - xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - - if (type_ != node_element && type_ != node_pi && type_ != node_declaration) - return false; - - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN bool xml_node::set_value(const char_t* rhs) - { - xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - - if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) - return false; - - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::append_attribute(a._attr, _root); - - a.set_name(name_); - - return a; - } +PUGI__FN xml_node::xml_node(): _root(0) +{ +} + +PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) +{ +} + +PUGI__FN static void unspecified_bool_xml_node(xml_node***) +{ +} + +PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const +{ + return _root ? unspecified_bool_xml_node : 0; +} + +PUGI__FN bool xml_node::operator!() const +{ + return !_root; +} + +PUGI__FN xml_node::iterator xml_node::begin() const +{ + return iterator(_root ? _root->first_child + 0 : 0, _root); +} + +PUGI__FN xml_node::iterator xml_node::end() const +{ + return iterator(0, _root); +} + +PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const +{ + return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); +} + +PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const +{ + return attribute_iterator(0, _root); +} + +PUGI__FN xml_object_range xml_node::children() const +{ + return xml_object_range(begin(), end()); +} + +PUGI__FN xml_object_range xml_node::children(const char_t* name_) const +{ + return xml_object_range(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); +} + +PUGI__FN xml_object_range xml_node::attributes() const +{ + return xml_object_range(attributes_begin(), attributes_end()); +} + +PUGI__FN bool xml_node::operator==(const xml_node& r) const +{ + return (_root == r._root); +} + +PUGI__FN bool xml_node::operator!=(const xml_node& r) const +{ + return (_root != r._root); +} + +PUGI__FN bool xml_node::operator<(const xml_node& r) const +{ + return (_root < r._root); +} + +PUGI__FN bool xml_node::operator>(const xml_node& r) const +{ + return (_root > r._root); +} + +PUGI__FN bool xml_node::operator<=(const xml_node& r) const +{ + return (_root <= r._root); +} + +PUGI__FN bool xml_node::operator>=(const xml_node& r) const +{ + return (_root >= r._root); +} + +PUGI__FN bool xml_node::empty() const +{ + return !_root; +} + +PUGI__FN const char_t* xml_node::name() const +{ + return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); +} + +PUGI__FN xml_node_type xml_node::type() const +{ + return _root ? PUGI__NODETYPE(_root) : node_null; +} + +PUGI__FN const char_t* xml_node::value() const +{ + return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); +} + +PUGI__FN xml_node xml_node::child(const char_t* name_) const +{ + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + + return xml_node(); +} + +PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const +{ + if (!_root) return xml_attribute(); + + for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) + if (i->name && impl::strequal(name_, i->name)) + return xml_attribute(i); + + return xml_attribute(); +} + +PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const +{ + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) + if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + + return xml_node(); +} + +PUGI__FN xml_node xml_node::next_sibling() const +{ + return _root ? xml_node(_root->next_sibling) : xml_node(); +} + +PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const +{ + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) + if (i->name && impl::strequal(name_, i->name)) return xml_node(i); + + return xml_node(); +} + +PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const +{ + xml_attribute_struct* hint = hint_._attr; + + // if hint is not an attribute of node, behavior is not defined + assert(!hint || (_root && impl::is_attribute_of(hint, _root))); + + if (!_root) return xml_attribute(); + + // optimistically search from hint up until the end + for (xml_attribute_struct* i = hint; i; i = i->next_attribute) + if (i->name && impl::strequal(name_, i->name)) { + // update hint to maximize efficiency of searching for consecutive attributes + hint_._attr = i->next_attribute; + + return xml_attribute(i); + } - PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); + // wrap around and search from the first attribute until the hint + // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails + for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) + if (j->name && impl::strequal(name_, j->name)) { + // update hint to maximize efficiency of searching for consecutive attributes + hint_._attr = j->next_attribute; - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); + return xml_attribute(j); + } - impl::prepend_attribute(a._attr, _root); + return xml_attribute(); +} - a.set_name(name_); +PUGI__FN xml_node xml_node::previous_sibling() const +{ + if (!_root) return xml_node(); - return a; - } + if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); + else return xml_node(); +} - PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); +PUGI__FN xml_node xml_node::parent() const +{ + return _root ? xml_node(_root->parent) : xml_node(); +} - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); +PUGI__FN xml_node xml_node::root() const +{ + return _root ? xml_node(&impl::get_document(_root)) : xml_node(); +} - impl::insert_attribute_after(a._attr, attr._attr, _root); +PUGI__FN xml_text xml_node::text() const +{ + return xml_text(_root); +} - a.set_name(name_); +PUGI__FN const char_t* xml_node::child_value() const +{ + if (!_root) return PUGIXML_TEXT(""); - return a; - } + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (impl::is_text_node(i) && i->value) + return i->value; - PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); + return PUGIXML_TEXT(""); +} - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); +PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const +{ + return child(name_).child_value(); +} - impl::insert_attribute_before(a._attr, attr._attr, _root); +PUGI__FN xml_attribute xml_node::first_attribute() const +{ + return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); +} - a.set_name(name_); +PUGI__FN xml_attribute xml_node::last_attribute() const +{ + return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); +} - return a; - } +PUGI__FN xml_node xml_node::first_child() const +{ + return _root ? xml_node(_root->first_child) : xml_node(); +} - PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); +PUGI__FN xml_node xml_node::last_child() const +{ + return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); +} - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); +PUGI__FN bool xml_node::set_name(const char_t* rhs) +{ + xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); + if (type_ != node_element && type_ != node_pi && type_ != node_declaration) + return false; - impl::append_attribute(a._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); + return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); +} - return a; - } +PUGI__FN bool xml_node::set_value(const char_t* rhs) +{ + xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); + if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) + return false; - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); + return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); +} - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); +PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) +{ + if (!impl::allow_insert_attribute(type())) return xml_attribute(); - impl::prepend_attribute(a._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - return a; - } + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); + impl::append_attribute(a._attr, _root); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); + a.set_name(name_); - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); + return a; +} - impl::insert_attribute_after(a._attr, attr._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); +PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) +{ + if (!impl::allow_insert_attribute(type())) return xml_attribute(); - return a; - } + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); + impl::prepend_attribute(a._attr, _root); - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); + a.set_name(name_); - impl::insert_attribute_before(a._attr, attr._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); + return a; +} - return a; - } +PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) +{ + if (!impl::allow_insert_attribute(type())) return xml_attribute(); + if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - PUGI__FN xml_node xml_node::append_child(xml_node_type type_) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - impl::append_node(n._root, _root); + impl::insert_attribute_after(a._attr, attr._attr, _root); - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + a.set_name(name_); - return n; - } + return a; +} - PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); +PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) +{ + if (!impl::allow_insert_attribute(type())) return xml_attribute(); + if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - impl::prepend_node(n._root, _root); - - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - return n; - } + impl::insert_attribute_before(a._attr, attr._attr, _root); - PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); + a.set_name(name_); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + return a; +} - impl::insert_node_before(n._root, node._root); +PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) +{ + if (!proto) return xml_attribute(); + if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - return n; - } + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); + impl::append_attribute(a._attr, _root); + impl::node_copy_attribute(a._attr, proto._attr); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + return a; +} - impl::insert_node_after(n._root, node._root); +PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) +{ + if (!proto) return xml_attribute(); + if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - return n; - } + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - PUGI__FN xml_node xml_node::append_child(const char_t* name_) - { - xml_node result = append_child(node_element); + impl::prepend_attribute(a._attr, _root); + impl::node_copy_attribute(a._attr, proto._attr); - result.set_name(name_); + return a; +} - return result; - } +PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) +{ + if (!proto) return xml_attribute(); + if (!impl::allow_insert_attribute(type())) return xml_attribute(); + if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) - { - xml_node result = prepend_child(node_element); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - result.set_name(name_); + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - return result; - } + impl::insert_attribute_after(a._attr, attr._attr, _root); + impl::node_copy_attribute(a._attr, proto._attr); - PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) - { - xml_node result = insert_child_after(node_element, node); + return a; +} - result.set_name(name_); +PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) +{ + if (!proto) return xml_attribute(); + if (!impl::allow_insert_attribute(type())) return xml_attribute(); + if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - return result; - } + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_attribute(); - PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) - { - xml_node result = insert_child_before(node_element, node); + xml_attribute a(impl::allocate_attribute(alloc)); + if (!a) return xml_attribute(); - result.set_name(name_); + impl::insert_attribute_before(a._attr, attr._attr, _root); + impl::node_copy_attribute(a._attr, proto._attr); - return result; - } + return a; +} - PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); +PUGI__FN xml_node xml_node::append_child(xml_node_type type_) +{ + if (!impl::allow_insert_child(type(), type_)) return xml_node(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - impl::append_node(n._root, _root); - impl::node_copy_tree(n._root, proto._root); + impl::append_node(n._root, _root); - return n; - } + if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); + return n; +} - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); +PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) +{ + if (!impl::allow_insert_child(type(), type_)) return xml_node(); - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - impl::prepend_node(n._root, _root); - impl::node_copy_tree(n._root, proto._root); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - return n; - } + impl::prepend_node(n._root, _root); - PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); + if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + return n; +} - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); +PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) +{ + if (!impl::allow_insert_child(type(), type_)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); - impl::insert_node_after(n._root, node._root); - impl::node_copy_tree(n._root, proto._root); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - return n; - } + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); + impl::insert_node_before(n._root, node._root); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); + return n; +} - impl::insert_node_before(n._root, node._root); - impl::node_copy_tree(n._root, proto._root); +PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) +{ + if (!impl::allow_insert_child(type(), type_)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); - return n; - } + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - PUGI__FN xml_node xml_node::append_move(const xml_node& moved) - { - if (!impl::allow_move(*this, moved)) return xml_node(); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + impl::insert_node_after(n._root, node._root); - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - impl::remove_node(moved._root); - impl::append_node(moved._root, _root); + return n; +} - return moved; - } +PUGI__FN xml_node xml_node::append_child(const char_t* name_) +{ + xml_node result = append_child(node_element); - PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) - { - if (!impl::allow_move(*this, moved)) return xml_node(); + result.set_name(name_); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + return result; +} - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; +PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) +{ + xml_node result = prepend_child(node_element); - impl::remove_node(moved._root); - impl::prepend_node(moved._root, _root); + result.set_name(name_); - return moved; - } + return result; +} - PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - if (moved._root == node._root) return xml_node(); +PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) +{ + xml_node result = insert_child_after(node_element, node); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); + result.set_name(name_); - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + return result; +} - impl::remove_node(moved._root); - impl::insert_node_after(moved._root, node._root); +PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) +{ + xml_node result = insert_child_before(node_element, node); - return moved; - } + result.set_name(name_); - PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - if (moved._root == node._root) return xml_node(); + return result; +} - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); +PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) +{ + xml_node_type type_ = proto.type(); + if (!impl::allow_insert_child(type(), type_)) return xml_node(); - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - impl::remove_node(moved._root); - impl::insert_node_before(moved._root, node._root); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - return moved; - } + impl::append_node(n._root, _root); + impl::node_copy_tree(n._root, proto._root); - PUGI__FN bool xml_node::remove_attribute(const char_t* name_) - { - return remove_attribute(attribute(name_)); - } + return n; +} - PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) - { - if (!_root || !a._attr) return false; - if (!impl::is_attribute_of(a._attr, _root)) return false; +PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) +{ + xml_node_type type_ = proto.type(); + if (!impl::allow_insert_child(type(), type_)) return xml_node(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - impl::remove_attribute(a._attr, _root); - impl::destroy_attribute(a._attr, alloc); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - return true; - } + impl::prepend_node(n._root, _root); + impl::node_copy_tree(n._root, proto._root); - PUGI__FN bool xml_node::remove_child(const char_t* name_) - { - return remove_child(child(name_)); - } + return n; +} - PUGI__FN bool xml_node::remove_child(const xml_node& n) - { - if (!_root || !n._root || n._root->parent != _root) return false; +PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) +{ + xml_node_type type_ = proto.type(); + if (!impl::allow_insert_child(type(), type_)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return false; + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - impl::remove_node(n._root); - impl::destroy_node(n._root, alloc); + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); - return true; - } + impl::insert_node_after(n._root, node._root); + impl::node_copy_tree(n._root, proto._root); - PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - // append_buffer is only valid for elements/documents - if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); + return n; +} - // get document node - impl::xml_document_struct* doc = &impl::get_document(_root); +PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) +{ + xml_node_type type_ = proto.type(); + if (!impl::allow_insert_child(type(), type_)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); - // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense - doc->header |= impl::xml_memory_page_contents_shared_mask; - - // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) - impl::xml_memory_page* page = 0; - impl::xml_extra_buffer* extra = static_cast(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); - (void)page; - - if (!extra) return impl::make_parse_result(status_out_of_memory); + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); - // add extra buffer to the list - extra->buffer = 0; - extra->next = doc->extra_buffers; - doc->extra_buffers = extra; - - // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level - impl::name_null_sentry sentry(_root); - - return impl::load_buffer_impl(doc, _root, const_cast(contents), size, options, encoding, false, false, &extra->buffer); - } - - PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) - return xml_node(i); - } - - return xml_node(); - } - - PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) - return xml_node(i); - - return xml_node(); - } + xml_node n(impl::allocate_node(alloc, type_)); + if (!n) return xml_node(); + + impl::insert_node_before(n._root, node._root); + impl::node_copy_tree(n._root, proto._root); + + return n; +} + +PUGI__FN xml_node xml_node::append_move(const xml_node& moved) +{ + if (!impl::allow_move(*this, moved)) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers + impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + + impl::remove_node(moved._root); + impl::append_node(moved._root, _root); + + return moved; +} + +PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) +{ + if (!impl::allow_move(*this, moved)) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers + impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + + impl::remove_node(moved._root); + impl::prepend_node(moved._root, _root); + + return moved; +} + +PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) +{ + if (!impl::allow_move(*this, moved)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + if (moved._root == node._root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers + impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + + impl::remove_node(moved._root); + impl::insert_node_after(moved._root, node._root); + + return moved; +} + +PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) +{ + if (!impl::allow_move(*this, moved)) return xml_node(); + if (!node._root || node._root->parent != _root) return xml_node(); + if (moved._root == node._root) return xml_node(); + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return xml_node(); + + // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers + impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; + + impl::remove_node(moved._root); + impl::insert_node_before(moved._root, node._root); + + return moved; +} + +PUGI__FN bool xml_node::remove_attribute(const char_t* name_) +{ + return remove_attribute(attribute(name_)); +} + +PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) +{ + if (!_root || !a._attr) return false; + if (!impl::is_attribute_of(a._attr, _root)) return false; + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + + impl::remove_attribute(a._attr, _root); + impl::destroy_attribute(a._attr, alloc); + + return true; +} + +PUGI__FN bool xml_node::remove_child(const char_t* name_) +{ + return remove_child(child(name_)); +} + +PUGI__FN bool xml_node::remove_child(const xml_node& n) +{ + if (!_root || !n._root || n._root->parent != _root) return false; + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + + impl::remove_node(n._root); + impl::destroy_node(n._root, alloc); + + return true; +} + +PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) +{ + // append_buffer is only valid for elements/documents + if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); + + // get document node + impl::xml_document_struct* doc = &impl::get_document(_root); + + // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense + doc->header |= impl::xml_memory_page_contents_shared_mask; + + // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) + impl::xml_memory_page* page = 0; + impl::xml_extra_buffer* extra = static_cast(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); + (void)page; + + if (!extra) return impl::make_parse_result(status_out_of_memory); + + // add extra buffer to the list + extra->buffer = 0; + extra->next = doc->extra_buffers; + doc->extra_buffers = extra; + + // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level + impl::name_null_sentry sentry(_root); + + return impl::load_buffer_impl(doc, _root, const_cast(contents), size, options, encoding, false, false, &extra->buffer); +} + +PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const +{ + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (i->name && impl::strequal(name_, i->name)) { + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) + return xml_node(i); + } + + return xml_node(); +} + +PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const +{ + if (!_root) return xml_node(); + + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) + if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) + return xml_node(i); + + return xml_node(); +} #ifndef PUGIXML_NO_STL - PUGI__FN string_t xml_node::path(char_t delimiter) const - { - if (!_root) return string_t(); +PUGI__FN string_t xml_node::path(char_t delimiter) const +{ + if (!_root) return string_t(); - size_t offset = 0; + size_t offset = 0; - for (xml_node_struct* i = _root; i; i = i->parent) - { - offset += (i != _root); - offset += i->name ? impl::strlength(i->name) : 0; - } + for (xml_node_struct* i = _root; i; i = i->parent) { + offset += (i != _root); + offset += i->name ? impl::strlength(i->name) : 0; + } - string_t result; - result.resize(offset); + string_t result; + result.resize(offset); - for (xml_node_struct* j = _root; j; j = j->parent) - { - if (j != _root) - result[--offset] = delimiter; + for (xml_node_struct* j = _root; j; j = j->parent) { + if (j != _root) + result[--offset] = delimiter; - if (j->name && *j->name) - { - size_t length = impl::strlength(j->name); + if (j->name && *j->name) { + size_t length = impl::strlength(j->name); - offset -= length; - memcpy(&result[offset], j->name, length * sizeof(char_t)); - } - } + offset -= length; + memcpy(&result[offset], j->name, length * sizeof(char_t)); + } + } - assert(offset == 0); + assert(offset == 0); - return result; - } + return result; +} #endif - PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const - { - xml_node found = *this; // Current search context. +PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const +{ + xml_node found = *this; // Current search context. - if (!_root || !path_ || !path_[0]) return found; + if (!_root || !path_ || !path_[0]) return found; - if (path_[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - found = found.root(); - ++path_; - } + if (path_[0] == delimiter) { + // Absolute path; e.g. '/foo/bar' + found = found.root(); + ++path_; + } - const char_t* path_segment = path_; + const char_t* path_segment = path_; - while (*path_segment == delimiter) ++path_segment; + while (*path_segment == delimiter) ++path_segment; - const char_t* path_segment_end = path_segment; + const char_t* path_segment_end = path_segment; - while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; + while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - if (path_segment == path_segment_end) return found; + if (path_segment == path_segment_end) return found; - const char_t* next_segment = path_segment_end; + const char_t* next_segment = path_segment_end; - while (*next_segment == delimiter) ++next_segment; + while (*next_segment == delimiter) ++next_segment; - if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); - else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); - else - { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) - { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) - { - xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); + if (*path_segment == '.' && path_segment + 1 == path_segment_end) + return found.first_element_by_path(next_segment, delimiter); + else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) + return found.parent().first_element_by_path(next_segment, delimiter); + else { + for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) { + if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) { + xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); - if (subsearch) return subsearch; - } - } + if (subsearch) return subsearch; + } + } - return xml_node(); - } - } + return xml_node(); + } +} - PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) - { - walker._depth = -1; - - xml_node arg_begin = *this; - if (!walker.begin(arg_begin)) return false; +PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) +{ + walker._depth = -1; - xml_node cur = first_child(); - - if (cur) - { - ++walker._depth; + xml_node arg_begin = *this; + if (!walker.begin(arg_begin)) return false; - do - { - xml_node arg_for_each = cur; - if (!walker.for_each(arg_for_each)) - return false; - - if (cur.first_child()) - { - ++walker._depth; - cur = cur.first_child(); - } - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) - { - --walker._depth; - cur = cur.parent(); - } - - if (cur != *this) - cur = cur.next_sibling(); - } - } - while (cur && cur != *this); - } + xml_node cur = first_child(); - assert(walker._depth == -1); + if (cur) { + ++walker._depth; - xml_node arg_end = *this; - return walker.end(arg_end); - } + do { + xml_node arg_for_each = cur; + if (!walker.for_each(arg_for_each)) + return false; - PUGI__FN size_t xml_node::hash_value() const - { - return static_cast(reinterpret_cast(_root) / sizeof(xml_node_struct)); - } + if (cur.first_child()) { + ++walker._depth; + cur = cur.first_child(); + } else if (cur.next_sibling()) + cur = cur.next_sibling(); + else { + // Borland C++ workaround + while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) { + --walker._depth; + cur = cur.parent(); + } - PUGI__FN xml_node_struct* xml_node::internal_object() const - { - return _root; - } + if (cur != *this) + cur = cur.next_sibling(); + } + } while (cur && cur != *this); + } - PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; + assert(walker._depth == -1); - impl::xml_buffered_writer buffered_writer(writer, encoding); + xml_node arg_end = *this; + return walker.end(arg_end); +} - impl::node_output(buffered_writer, _root, indent, flags, depth); +PUGI__FN size_t xml_node::hash_value() const +{ + return static_cast(reinterpret_cast(_root) / sizeof(xml_node_struct)); +} - buffered_writer.flush(); - } +PUGI__FN xml_node_struct* xml_node::internal_object() const +{ + return _root; +} + +PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const +{ + if (!_root) return; + + impl::xml_buffered_writer buffered_writer(writer, encoding); + + impl::node_output(buffered_writer, _root, indent, flags, depth); + + buffered_writer.flush(); +} #ifndef PUGIXML_NO_STL - PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - xml_writer_stream writer(stream); +PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const +{ + xml_writer_stream writer(stream); - print(writer, indent, flags, encoding, depth); - } + print(writer, indent, flags, encoding, depth); +} - PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const - { - xml_writer_stream writer(stream); +PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const +{ + xml_writer_stream writer(stream); - print(writer, indent, flags, encoding_wchar, depth); - } + print(writer, indent, flags, encoding_wchar, depth); +} #endif - PUGI__FN ptrdiff_t xml_node::offset_debug() const - { - if (!_root) return -1; +PUGI__FN ptrdiff_t xml_node::offset_debug() const +{ + if (!_root) return -1; - impl::xml_document_struct& doc = impl::get_document(_root); + impl::xml_document_struct& doc = impl::get_document(_root); - // we can determine the offset reliably only if there is exactly once parse buffer - if (!doc.buffer || doc.extra_buffers) return -1; + // we can determine the offset reliably only if there is exactly once parse buffer + if (!doc.buffer || doc.extra_buffers) return -1; - switch (type()) - { - case node_document: - return 0; + switch (type()) { + case node_document: + return 0; - case node_element: - case node_declaration: - case node_pi: - return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; + case node_element: + case node_declaration: + case node_pi: + return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; - case node_pcdata: - case node_cdata: - case node_comment: - case node_doctype: - return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; + case node_pcdata: + case node_cdata: + case node_comment: + case node_doctype: + return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; - default: - return -1; - } - } + default: + return -1; + } +} #ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } +PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) +{ + return (bool)lhs && rhs; +} - PUGI__FN bool operator||(const xml_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } +PUGI__FN bool operator||(const xml_node& lhs, bool rhs) +{ + return (bool)lhs || rhs; +} #endif - PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) - { - } +PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) +{ +} - PUGI__FN xml_node_struct* xml_text::_data() const - { - if (!_root || impl::is_text_node(_root)) return _root; +PUGI__FN xml_node_struct* xml_text::_data() const +{ + if (!_root || impl::is_text_node(_root)) return _root; - for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) - if (impl::is_text_node(node)) - return node; + for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) + if (impl::is_text_node(node)) + return node; - return 0; - } + return 0; +} - PUGI__FN xml_node_struct* xml_text::_data_new() - { - xml_node_struct* d = _data(); - if (d) return d; +PUGI__FN xml_node_struct* xml_text::_data_new() +{ + xml_node_struct* d = _data(); + if (d) return d; - return xml_node(_root).append_child(node_pcdata).internal_object(); - } + return xml_node(_root).append_child(node_pcdata).internal_object(); +} - PUGI__FN xml_text::xml_text(): _root(0) - { - } +PUGI__FN xml_text::xml_text(): _root(0) +{ +} - PUGI__FN static void unspecified_bool_xml_text(xml_text***) - { - } +PUGI__FN static void unspecified_bool_xml_text(xml_text***) +{ +} - PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const - { - return _data() ? unspecified_bool_xml_text : 0; - } +PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const +{ + return _data() ? unspecified_bool_xml_text : 0; +} - PUGI__FN bool xml_text::operator!() const - { - return !_data(); - } +PUGI__FN bool xml_text::operator!() const +{ + return !_data(); +} - PUGI__FN bool xml_text::empty() const - { - return _data() == 0; - } +PUGI__FN bool xml_text::empty() const +{ + return _data() == 0; +} - PUGI__FN const char_t* xml_text::get() const - { - xml_node_struct* d = _data(); +PUGI__FN const char_t* xml_text::get() const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); - } + return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); +} - PUGI__FN const char_t* xml_text::as_string(const char_t* def) const - { - xml_node_struct* d = _data(); +PUGI__FN const char_t* xml_text::as_string(const char_t* def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? d->value + 0 : def; - } + return (d && d->value) ? d->value + 0 : def; +} - PUGI__FN int xml_text::as_int(int def) const - { - xml_node_struct* d = _data(); +PUGI__FN int xml_text::as_int(int def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_int(d->value) : def; - } + return (d && d->value) ? impl::get_value_int(d->value) : def; +} - PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const - { - xml_node_struct* d = _data(); +PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_uint(d->value) : def; - } + return (d && d->value) ? impl::get_value_uint(d->value) : def; +} - PUGI__FN double xml_text::as_double(double def) const - { - xml_node_struct* d = _data(); +PUGI__FN double xml_text::as_double(double def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_double(d->value) : def; - } + return (d && d->value) ? impl::get_value_double(d->value) : def; +} - PUGI__FN float xml_text::as_float(float def) const - { - xml_node_struct* d = _data(); +PUGI__FN float xml_text::as_float(float def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_float(d->value) : def; - } + return (d && d->value) ? impl::get_value_float(d->value) : def; +} - PUGI__FN bool xml_text::as_bool(bool def) const - { - xml_node_struct* d = _data(); +PUGI__FN bool xml_text::as_bool(bool def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_bool(d->value) : def; - } + return (d && d->value) ? impl::get_value_bool(d->value) : def; +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long xml_text::as_llong(long long def) const - { - xml_node_struct* d = _data(); +PUGI__FN long long xml_text::as_llong(long long def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_llong(d->value) : def; - } + return (d && d->value) ? impl::get_value_llong(d->value) : def; +} - PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const - { - xml_node_struct* d = _data(); +PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const +{ + xml_node_struct* d = _data(); - return (d && d->value) ? impl::get_value_ullong(d->value) : def; - } + return (d && d->value) ? impl::get_value_ullong(d->value) : def; +} #endif - PUGI__FN bool xml_text::set(const char_t* rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(const char_t* rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; - } + return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; +} - PUGI__FN bool xml_text::set(int rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(int rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} - PUGI__FN bool xml_text::set(unsigned int rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(unsigned int rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} - PUGI__FN bool xml_text::set(float rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(float rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} - PUGI__FN bool xml_text::set(double rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(double rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} - PUGI__FN bool xml_text::set(bool rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(bool rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool xml_text::set(long long rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(long long rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} - PUGI__FN bool xml_text::set(unsigned long long rhs) - { - xml_node_struct* dn = _data_new(); +PUGI__FN bool xml_text::set(unsigned long long rhs) +{ + xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; +} #endif - PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(int rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(int rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(double rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(double rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(float rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(float rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(bool rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(bool rhs) +{ + set(rhs); + return *this; +} #ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN xml_text& xml_text::operator=(long long rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(long long rhs) +{ + set(rhs); + return *this; +} - PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) - { - set(rhs); - return *this; - } +PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) +{ + set(rhs); + return *this; +} #endif - PUGI__FN xml_node xml_text::data() const - { - return xml_node(_data()); - } +PUGI__FN xml_node xml_text::data() const +{ + return xml_node(_data()); +} #ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) - { - return (bool)lhs && rhs; - } +PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) +{ + return (bool)lhs && rhs; +} - PUGI__FN bool operator||(const xml_text& lhs, bool rhs) - { - return (bool)lhs || rhs; - } +PUGI__FN bool operator||(const xml_text& lhs, bool rhs) +{ + return (bool)lhs || rhs; +} #endif - PUGI__FN xml_node_iterator::xml_node_iterator() - { - } - - PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) - { - } - - PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_node& xml_node_iterator::operator*() const - { - assert(_wrap._root); - return _wrap; - } - - PUGI__FN xml_node* xml_node_iterator::operator->() const - { - assert(_wrap._root); - return const_cast(&_wrap); // BCC32 workaround - } - - PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() - { - assert(_wrap._root); - _wrap._root = _wrap._root->next_sibling; - return *this; - } - - PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) - { - xml_node_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() - { - _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); - return *this; - } - - PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) - { - xml_node_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator() - { - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) - { - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const - { - return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const - { - return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const - { - assert(_wrap._attr); - return _wrap; - } - - PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const - { - assert(_wrap._attr); - return const_cast(&_wrap); // BCC32 workaround - } - - PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() - { - assert(_wrap._attr); - _wrap._attr = _wrap._attr->next_attribute; - return *this; - } - - PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) - { - xml_attribute_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() - { - _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); - return *this; - } - - PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) - { - xml_attribute_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) - { - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) - { - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) - { - } - - PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_node& xml_named_node_iterator::operator*() const - { - assert(_wrap._root); - return _wrap; - } - - PUGI__FN xml_node* xml_named_node_iterator::operator->() const - { - assert(_wrap._root); - return const_cast(&_wrap); // BCC32 workaround - } - - PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() - { - assert(_wrap._root); - _wrap = _wrap.next_sibling(_name); - return *this; - } - - PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) - { - xml_named_node_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() - { - if (_wrap._root) - _wrap = _wrap.previous_sibling(_name); - else - { - _wrap = _parent.last_child(); - - if (!impl::strequal(_wrap.name(), _name)) - _wrap = _wrap.previous_sibling(_name); - } - - return *this; - } - - PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) - { - xml_named_node_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) - { - } - - PUGI__FN xml_parse_result::operator bool() const - { - return status == status_ok; - } - - PUGI__FN const char* xml_parse_result::description() const - { - switch (status) - { - case status_ok: return "No error"; - - case status_file_not_found: return "File was not found"; - case status_io_error: return "Error reading from file/stream"; - case status_out_of_memory: return "Could not allocate memory"; - case status_internal_error: return "Internal error occurred"; - - case status_unrecognized_tag: return "Could not determine tag type"; - - case status_bad_pi: return "Error parsing document declaration/processing instruction"; - case status_bad_comment: return "Error parsing comment"; - case status_bad_cdata: return "Error parsing CDATA section"; - case status_bad_doctype: return "Error parsing document type declaration"; - case status_bad_pcdata: return "Error parsing PCDATA section"; - case status_bad_start_element: return "Error parsing start element tag"; - case status_bad_attribute: return "Error parsing element attribute"; - case status_bad_end_element: return "Error parsing end element tag"; - case status_end_element_mismatch: return "Start-end tags mismatch"; - - case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; - - case status_no_document_element: return "No document element found"; - - default: return "Unknown error"; - } - } - - PUGI__FN xml_document::xml_document(): _buffer(0) - { - create(); - } - - PUGI__FN xml_document::~xml_document() - { - destroy(); - } - - PUGI__FN void xml_document::reset() - { - destroy(); - create(); - } - - PUGI__FN void xml_document::reset(const xml_document& proto) - { - reset(); - - for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) - append_copy(cur); - } - - PUGI__FN void xml_document::create() - { - assert(!_root); - - #ifdef PUGIXML_COMPACT - const size_t page_offset = sizeof(uint32_t); - #else - const size_t page_offset = 0; - #endif - - // initialize sentinel page - PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1)); - - // prepare page structure - impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory); - assert(page); - - page->busy_size = impl::xml_memory_page_size; - - // setup first page marker - #ifdef PUGIXML_COMPACT - // round-trip through void* to avoid 'cast increases required alignment of target type' warning - page->compact_page_marker = reinterpret_cast(static_cast(reinterpret_cast(page) + sizeof(impl::xml_memory_page))); - *page->compact_page_marker = sizeof(impl::xml_memory_page); - #endif - - // allocate new root - _root = new (reinterpret_cast(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); - _root->prev_sibling_c = _root; - - // setup sentinel page - page->allocator = static_cast(_root); - - // verify the document allocation - assert(reinterpret_cast(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); - } - - PUGI__FN void xml_document::destroy() - { - assert(_root); - - // destroy static storage - if (_buffer) - { - impl::xml_memory::deallocate(_buffer); - _buffer = 0; - } - - // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) - for (impl::xml_extra_buffer* extra = static_cast(_root)->extra_buffers; extra; extra = extra->next) - { - if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); - } - - // destroy dynamic storage, leave sentinel page (it's in static memory) - impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); - assert(root_page && !root_page->prev); - assert(reinterpret_cast(root_page) >= _memory && reinterpret_cast(root_page) < _memory + sizeof(_memory)); - - for (impl::xml_memory_page* page = root_page->next; page; ) - { - impl::xml_memory_page* next = page->next; - - impl::xml_allocator::deallocate_page(page); +PUGI__FN xml_node_iterator::xml_node_iterator() +{ +} + +PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) +{ +} + +PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) +{ +} + +PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const +{ + return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; +} + +PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const +{ + return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; +} + +PUGI__FN xml_node& xml_node_iterator::operator*() const +{ + assert(_wrap._root); + return _wrap; +} + +PUGI__FN xml_node* xml_node_iterator::operator->() const +{ + assert(_wrap._root); + return const_cast(&_wrap); // BCC32 workaround +} + +PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() +{ + assert(_wrap._root); + _wrap._root = _wrap._root->next_sibling; + return *this; +} + +PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) +{ + xml_node_iterator temp = *this; + ++*this; + return temp; +} + +PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() +{ + _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); + return *this; +} + +PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) +{ + xml_node_iterator temp = *this; + --*this; + return temp; +} + +PUGI__FN xml_attribute_iterator::xml_attribute_iterator() +{ +} + +PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) +{ +} + +PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) +{ +} + +PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const +{ + return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; +} + +PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const +{ + return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; +} + +PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const +{ + assert(_wrap._attr); + return _wrap; +} + +PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const +{ + assert(_wrap._attr); + return const_cast(&_wrap); // BCC32 workaround +} + +PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() +{ + assert(_wrap._attr); + _wrap._attr = _wrap._attr->next_attribute; + return *this; +} + +PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) +{ + xml_attribute_iterator temp = *this; + ++*this; + return temp; +} + +PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() +{ + _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); + return *this; +} + +PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) +{ + xml_attribute_iterator temp = *this; + --*this; + return temp; +} + +PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) +{ +} + +PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) +{ +} + +PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) +{ +} + +PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const +{ + return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; +} + +PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const +{ + return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; +} + +PUGI__FN xml_node& xml_named_node_iterator::operator*() const +{ + assert(_wrap._root); + return _wrap; +} + +PUGI__FN xml_node* xml_named_node_iterator::operator->() const +{ + assert(_wrap._root); + return const_cast(&_wrap); // BCC32 workaround +} + +PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() +{ + assert(_wrap._root); + _wrap = _wrap.next_sibling(_name); + return *this; +} + +PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) +{ + xml_named_node_iterator temp = *this; + ++*this; + return temp; +} + +PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() +{ + if (_wrap._root) + _wrap = _wrap.previous_sibling(_name); + else { + _wrap = _parent.last_child(); + + if (!impl::strequal(_wrap.name(), _name)) + _wrap = _wrap.previous_sibling(_name); + } + + return *this; +} + +PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) +{ + xml_named_node_iterator temp = *this; + --*this; + return temp; +} + +PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) +{ +} + +PUGI__FN xml_parse_result::operator bool() const +{ + return status == status_ok; +} + +PUGI__FN const char* xml_parse_result::description() const +{ + switch (status) { + case status_ok: + return "No error"; + + case status_file_not_found: + return "File was not found"; + case status_io_error: + return "Error reading from file/stream"; + case status_out_of_memory: + return "Could not allocate memory"; + case status_internal_error: + return "Internal error occurred"; + + case status_unrecognized_tag: + return "Could not determine tag type"; + + case status_bad_pi: + return "Error parsing document declaration/processing instruction"; + case status_bad_comment: + return "Error parsing comment"; + case status_bad_cdata: + return "Error parsing CDATA section"; + case status_bad_doctype: + return "Error parsing document type declaration"; + case status_bad_pcdata: + return "Error parsing PCDATA section"; + case status_bad_start_element: + return "Error parsing start element tag"; + case status_bad_attribute: + return "Error parsing element attribute"; + case status_bad_end_element: + return "Error parsing end element tag"; + case status_end_element_mismatch: + return "Start-end tags mismatch"; + + case status_append_invalid_root: + return "Unable to append nodes: root is not an element or document"; + + case status_no_document_element: + return "No document element found"; + + default: + return "Unknown error"; + } +} + +PUGI__FN xml_document::xml_document(): _buffer(0) +{ + create(); +} + +PUGI__FN xml_document::~xml_document() +{ + destroy(); +} + +PUGI__FN void xml_document::reset() +{ + destroy(); + create(); +} + +PUGI__FN void xml_document::reset(const xml_document& proto) +{ + reset(); + + for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) + append_copy(cur); +} + +PUGI__FN void xml_document::create() +{ + assert(!_root); + +#ifdef PUGIXML_COMPACT + const size_t page_offset = sizeof(uint32_t); +#else + const size_t page_offset = 0; +#endif + + // initialize sentinel page + PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); + + // align upwards to page boundary + void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1)); + + // prepare page structure + impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory); + assert(page); + + page->busy_size = impl::xml_memory_page_size; + + // setup first page marker +#ifdef PUGIXML_COMPACT + // round-trip through void* to avoid 'cast increases required alignment of target type' warning + page->compact_page_marker = reinterpret_cast(static_cast(reinterpret_cast(page) + sizeof(impl::xml_memory_page))); + *page->compact_page_marker = sizeof(impl::xml_memory_page); +#endif + + // allocate new root + _root = new (reinterpret_cast(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); + _root->prev_sibling_c = _root; + + // setup sentinel page + page->allocator = static_cast(_root); + + // verify the document allocation + assert(reinterpret_cast(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); +} + +PUGI__FN void xml_document::destroy() +{ + assert(_root); + + // destroy static storage + if (_buffer) { + impl::xml_memory::deallocate(_buffer); + _buffer = 0; + } + + // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) + for (impl::xml_extra_buffer* extra = static_cast(_root)->extra_buffers; extra; extra = extra->next) { + if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); + } + + // destroy dynamic storage, leave sentinel page (it's in static memory) + impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); + assert(root_page && !root_page->prev); + assert(reinterpret_cast(root_page) >= _memory && reinterpret_cast(root_page) < _memory + sizeof(_memory)); + + for (impl::xml_memory_page* page = root_page->next; page; ) { + impl::xml_memory_page* next = page->next; + + impl::xml_allocator::deallocate_page(page); - page = next; - } - - #ifdef PUGIXML_COMPACT - // destroy hash table - static_cast(_root)->hash.clear(); - #endif - - _root = 0; - } + page = next; + } + +#ifdef PUGIXML_COMPACT + // destroy hash table + static_cast(_root)->hash.clear(); +#endif + + _root = 0; +} #ifndef PUGIXML_NO_STL - PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) +{ + reset(); - return impl::load_stream_impl(static_cast(_root), stream, options, encoding, &_buffer); - } + return impl::load_stream_impl(static_cast(_root), stream, options, encoding, &_buffer); +} - PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) - { - reset(); +PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) +{ + reset(); - return impl::load_stream_impl(static_cast(_root), stream, options, encoding_wchar, &_buffer); - } + return impl::load_stream_impl(static_cast(_root), stream, options, encoding_wchar, &_buffer); +} #endif - PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) - { - // Force native encoding (skip autodetection) - #ifdef PUGIXML_WCHAR_MODE - xml_encoding encoding = encoding_wchar; - #else - xml_encoding encoding = encoding_utf8; - #endif +PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) +{ + // Force native encoding (skip autodetection) +#ifdef PUGIXML_WCHAR_MODE + xml_encoding encoding = encoding_wchar; +#else + xml_encoding encoding = encoding_utf8; +#endif - return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); - } + return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); +} - PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) - { - return load_string(contents, options); - } +PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) +{ + return load_string(contents, options); +} - PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) +{ + reset(); - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, "rb"), fclose); + using impl::auto_deleter; // MSVC7 workaround + auto_deleter file(fopen(path_, "rb"), fclose); - return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); - } + return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); +} - PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) +{ + reset(); - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(impl::open_file_wide(path_, L"rb"), fclose); + using impl::auto_deleter; // MSVC7 workaround + auto_deleter file(impl::open_file_wide(path_, L"rb"), fclose); - return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); - } + return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); +} - PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) +{ + reset(); - return impl::load_buffer_impl(static_cast(_root), _root, const_cast(contents), size, options, encoding, false, false, &_buffer); - } + return impl::load_buffer_impl(static_cast(_root), _root, const_cast(contents), size, options, encoding, false, false, &_buffer); +} - PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) +{ + reset(); - return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, false, &_buffer); - } + return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, false, &_buffer); +} - PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); +PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) +{ + reset(); - return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, true, &_buffer); - } + return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, true, &_buffer); +} - PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - impl::xml_buffered_writer buffered_writer(writer, encoding); +PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const +{ + impl::xml_buffered_writer buffered_writer(writer, encoding); - if ((flags & format_write_bom) && encoding != encoding_latin1) - { - // BOM always represents the codepoint U+FEFF, so just write it in native encoding - #ifdef PUGIXML_WCHAR_MODE - unsigned int bom = 0xfeff; - buffered_writer.write(static_cast(bom)); - #else - buffered_writer.write('\xef', '\xbb', '\xbf'); - #endif - } + if ((flags & format_write_bom) && encoding != encoding_latin1) { + // BOM always represents the codepoint U+FEFF, so just write it in native encoding +#ifdef PUGIXML_WCHAR_MODE + unsigned int bom = 0xfeff; + buffered_writer.write(static_cast(bom)); +#else + buffered_writer.write('\xef', '\xbb', '\xbf'); +#endif + } - if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) - { - buffered_writer.write_string(PUGIXML_TEXT("'); - if (!(flags & format_raw)) buffered_writer.write('\n'); - } + if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) { + buffered_writer.write_string(PUGIXML_TEXT("'); + if (!(flags & format_raw)) buffered_writer.write('\n'); + } - impl::node_output(buffered_writer, _root, indent, flags, 0); + impl::node_output(buffered_writer, _root, indent, flags, 0); - buffered_writer.flush(); - } + buffered_writer.flush(); +} #ifndef PUGIXML_NO_STL - PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - xml_writer_stream writer(stream); +PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const +{ + xml_writer_stream writer(stream); - save(writer, indent, flags, encoding); - } + save(writer, indent, flags, encoding); +} - PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const - { - xml_writer_stream writer(stream); +PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const +{ + xml_writer_stream writer(stream); - save(writer, indent, flags, encoding_wchar); - } + save(writer, indent, flags, encoding_wchar); +} #endif - PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose); +PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const +{ + using impl::auto_deleter; // MSVC7 workaround + auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose); - return impl::save_file_impl(*this, file.data, indent, flags, encoding); - } + return impl::save_file_impl(*this, file.data, indent, flags, encoding); +} - PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose); +PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const +{ + using impl::auto_deleter; // MSVC7 workaround + auto_deleter file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose); - return impl::save_file_impl(*this, file.data, indent, flags, encoding); - } + return impl::save_file_impl(*this, file.data, indent, flags, encoding); +} - PUGI__FN xml_node xml_document::document_element() const - { - assert(_root); +PUGI__FN xml_node xml_document::document_element() const +{ + assert(_root); - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (PUGI__NODETYPE(i) == node_element) - return xml_node(i); + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) + if (PUGI__NODETYPE(i) == node_element) + return xml_node(i); - return xml_node(); - } + return xml_node(); +} #ifndef PUGIXML_NO_STL - PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) - { - assert(str); +PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) +{ + assert(str); - return impl::as_utf8_impl(str, impl::strlength_wide(str)); - } + return impl::as_utf8_impl(str, impl::strlength_wide(str)); +} - PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string& str) - { - return impl::as_utf8_impl(str.c_str(), str.size()); - } - - PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const char* str) - { - assert(str); +PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string& str) +{ + return impl::as_utf8_impl(str.c_str(), str.size()); +} - return impl::as_wide_impl(str, strlen(str)); - } - - PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const std::string& str) - { - return impl::as_wide_impl(str.c_str(), str.size()); - } +PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const char* str) +{ + assert(str); + + return impl::as_wide_impl(str, strlen(str)); +} + +PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const std::string& str) +{ + return impl::as_wide_impl(str.c_str(), str.size()); +} #endif - PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) - { - impl::xml_memory::allocate = allocate; - impl::xml_memory::deallocate = deallocate; - } +PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) +{ + impl::xml_memory::allocate = allocate; + impl::xml_memory::deallocate = deallocate; +} - PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() - { - return impl::xml_memory::allocate; - } +PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() +{ + return impl::xml_memory::allocate; +} - PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() - { - return impl::xml_memory::deallocate; - } +PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() +{ + return impl::xml_memory::deallocate; +} } #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) namespace std { - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } +// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) +PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) +{ + return std::bidirectional_iterator_tag(); +} - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } +PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) +{ + return std::bidirectional_iterator_tag(); +} - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } +PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) +{ + return std::bidirectional_iterator_tag(); +} } #endif #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) namespace std { - // Workarounds for (non-standard) iterator category detection - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } +// Workarounds for (non-standard) iterator category detection +PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) +{ + return std::bidirectional_iterator_tag(); +} - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } +PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) +{ + return std::bidirectional_iterator_tag(); +} - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } +PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) +{ + return std::bidirectional_iterator_tag(); +} } #endif #ifndef PUGIXML_NO_XPATH // STL replacements PUGI__NS_BEGIN - struct equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs == rhs; - } - }; +struct equal_to { + template bool operator()(const T& lhs, const T& rhs) const { + return lhs == rhs; + } +}; - struct not_equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs != rhs; - } - }; +struct not_equal_to { + template bool operator()(const T& lhs, const T& rhs) const { + return lhs != rhs; + } +}; - struct less - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs < rhs; - } - }; +struct less { + template bool operator()(const T& lhs, const T& rhs) const { + return lhs < rhs; + } +}; - struct less_equal - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs <= rhs; - } - }; +struct less_equal { + template bool operator()(const T& lhs, const T& rhs) const { + return lhs <= rhs; + } +}; - template void swap(T& lhs, T& rhs) - { - T temp = lhs; - lhs = rhs; - rhs = temp; - } +template void swap(T& lhs, T& rhs) +{ + T temp = lhs; + lhs = rhs; + rhs = temp; +} - template I min_element(I begin, I end, const Pred& pred) - { - I result = begin; +template I min_element(I begin, I end, const Pred& pred) +{ + I result = begin; - for (I it = begin + 1; it != end; ++it) - if (pred(*it, *result)) - result = it; + for (I it = begin + 1; it != end; ++it) + if (pred(*it, *result)) + result = it; - return result; - } + return result; +} - template void reverse(I begin, I end) - { - while (end - begin > 1) swap(*begin++, *--end); - } +template void reverse(I begin, I end) +{ + while (end - begin > 1) swap(*begin++, *--end); +} - template I unique(I begin, I end) - { - // fast skip head - while (end - begin > 1 && *begin != *(begin + 1)) begin++; +template I unique(I begin, I end) +{ + // fast skip head + while (end - begin > 1 && *begin != *(begin + 1)) begin++; - if (begin == end) return begin; + if (begin == end) return begin; - // last written element - I write = begin++; + // last written element + I write = begin++; - // merge unique elements - while (begin != end) - { - if (*begin != *write) - *++write = *begin++; - else - begin++; - } + // merge unique elements + while (begin != end) { + if (*begin != *write) + *++write = *begin++; + else + begin++; + } - // past-the-end (write points to live element) - return write + 1; - } + // past-the-end (write points to live element) + return write + 1; +} - template void copy_backwards(I begin, I end, I target) - { - while (begin != end) *--target = *--end; - } +template void copy_backwards(I begin, I end, I target) +{ + while (begin != end) *--target = *--end; +} - template void insertion_sort(I begin, I end, const Pred& pred, T*) - { - assert(begin != end); +template void insertion_sort(I begin, I end, const Pred& pred, T*) +{ + assert(begin != end); - for (I it = begin + 1; it != end; ++it) - { - T val = *it; + for (I it = begin + 1; it != end; ++it) { + T val = *it; - if (pred(val, *begin)) - { - // move to front - copy_backwards(begin, it, it + 1); - *begin = val; - } - else - { - I hole = it; + if (pred(val, *begin)) { + // move to front + copy_backwards(begin, it, it + 1); + *begin = val; + } else { + I hole = it; - // move hole backwards - while (pred(val, *(hole - 1))) - { - *hole = *(hole - 1); - hole--; - } + // move hole backwards + while (pred(val, *(hole - 1))) { + *hole = *(hole - 1); + hole--; + } - // fill hole with element - *hole = val; - } - } - } + // fill hole with element + *hole = val; + } + } +} - // std variant for elements with == - template void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) - { - I eqbeg = middle, eqend = middle + 1; +// std variant for elements with == +template void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) +{ + I eqbeg = middle, eqend = middle + 1; - // expand equal range - while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; - while (eqend != end && *eqend == *eqbeg) ++eqend; + // expand equal range + while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; + while (eqend != end && *eqend == *eqbeg) ++eqend; - // process outer elements - I ltend = eqbeg, gtbeg = eqend; + // process outer elements + I ltend = eqbeg, gtbeg = eqend; - for (;;) - { - // find the element from the right side that belongs to the left one - for (; gtbeg != end; ++gtbeg) - if (!pred(*eqbeg, *gtbeg)) - { - if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); - else break; - } + for (;;) { + // find the element from the right side that belongs to the left one + for (; gtbeg != end; ++gtbeg) + if (!pred(*eqbeg, *gtbeg)) { + if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); + else break; + } - // find the element from the left side that belongs to the right one - for (; ltend != begin; --ltend) - if (!pred(*(ltend - 1), *eqbeg)) - { - if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); - else break; - } + // find the element from the left side that belongs to the right one + for (; ltend != begin; --ltend) + if (!pred(*(ltend - 1), *eqbeg)) { + if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); + else break; + } - // scanned all elements - if (gtbeg == end && ltend == begin) - { - *out_eqbeg = eqbeg; - *out_eqend = eqend; - return; - } + // scanned all elements + if (gtbeg == end && ltend == begin) { + *out_eqbeg = eqbeg; + *out_eqend = eqend; + return; + } - // make room for elements by moving equal area - if (gtbeg == end) - { - if (--ltend != --eqbeg) swap(*ltend, *eqbeg); - swap(*eqbeg, *--eqend); - } - else if (ltend == begin) - { - if (eqend != gtbeg) swap(*eqbeg, *eqend); - ++eqend; - swap(*gtbeg++, *eqbeg++); - } - else swap(*gtbeg++, *--ltend); - } - } + // make room for elements by moving equal area + if (gtbeg == end) { + if (--ltend != --eqbeg) swap(*ltend, *eqbeg); + swap(*eqbeg, *--eqend); + } else if (ltend == begin) { + if (eqend != gtbeg) swap(*eqbeg, *eqend); + ++eqend; + swap(*gtbeg++, *eqbeg++); + } else swap(*gtbeg++, *--ltend); + } +} - template void median3(I first, I middle, I last, const Pred& pred) - { - if (pred(*middle, *first)) swap(*middle, *first); - if (pred(*last, *middle)) swap(*last, *middle); - if (pred(*middle, *first)) swap(*middle, *first); - } +template void median3(I first, I middle, I last, const Pred& pred) +{ + if (pred(*middle, *first)) swap(*middle, *first); + if (pred(*last, *middle)) swap(*last, *middle); + if (pred(*middle, *first)) swap(*middle, *first); +} - template void median(I first, I middle, I last, const Pred& pred) - { - if (last - first <= 40) - { - // median of three for small chunks - median3(first, middle, last, pred); - } - else - { - // median of nine - size_t step = (last - first + 1) / 8; +template void median(I first, I middle, I last, const Pred& pred) +{ + if (last - first <= 40) { + // median of three for small chunks + median3(first, middle, last, pred); + } else { + // median of nine + size_t step = (last - first + 1) / 8; - median3(first, first + step, first + 2 * step, pred); - median3(middle - step, middle, middle + step, pred); - median3(last - 2 * step, last - step, last, pred); - median3(first + step, middle, last - step, pred); - } - } + median3(first, first + step, first + 2 * step, pred); + median3(middle - step, middle, middle + step, pred); + median3(last - 2 * step, last - step, last, pred); + median3(first + step, middle, last - step, pred); + } +} - template void sort(I begin, I end, const Pred& pred) - { - // sort large chunks - while (end - begin > 32) - { - // find median element - I middle = begin + (end - begin) / 2; - median(begin, middle, end - 1, pred); +template void sort(I begin, I end, const Pred& pred) +{ + // sort large chunks + while (end - begin > 32) { + // find median element + I middle = begin + (end - begin) / 2; + median(begin, middle, end - 1, pred); - // partition in three chunks (< = >) - I eqbeg, eqend; - partition(begin, middle, end, pred, &eqbeg, &eqend); + // partition in three chunks (< = >) + I eqbeg, eqend; + partition(begin, middle, end, pred, &eqbeg, &eqend); - // loop on larger half - if (eqbeg - begin > end - eqend) - { - sort(eqend, end, pred); - end = eqbeg; - } - else - { - sort(begin, eqbeg, pred); - begin = eqend; - } - } + // loop on larger half + if (eqbeg - begin > end - eqend) { + sort(eqend, end, pred); + end = eqbeg; + } else { + sort(begin, eqbeg, pred); + begin = eqend; + } + } - // insertion sort small chunk - if (begin != end) insertion_sort(begin, end, pred, &*begin); - } + // insertion sort small chunk + if (begin != end) insertion_sort(begin, end, pred, &*begin); +} PUGI__NS_END // Allocator used for AST and evaluation stacks PUGI__NS_BEGIN - static const size_t xpath_memory_page_size = - #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE - PUGIXML_MEMORY_XPATH_PAGE_SIZE - #else - 4096 - #endif - ; +static const size_t xpath_memory_page_size = +#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE + PUGIXML_MEMORY_XPATH_PAGE_SIZE +#else + 4096 +#endif + ; - static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); +static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); - struct xpath_memory_block - { - xpath_memory_block* next; - size_t capacity; +struct xpath_memory_block { + xpath_memory_block* next; + size_t capacity; - union - { - char data[xpath_memory_page_size]; - double alignment; - }; - }; - - class xpath_allocator - { - xpath_memory_block* _root; - size_t _root_size; + union { + char data[xpath_memory_page_size]; + double alignment; + }; +}; - public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif +class xpath_allocator +{ + xpath_memory_block* _root; + size_t _root_size; - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) - { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif - } - - void* allocate_nothrow(size_t size) - { - // round size up to block alignment boundary - size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); +public: +#ifdef PUGIXML_NO_EXCEPTIONS + jmp_buf* error_handler; +#endif - if (_root_size + size <= _root->capacity) - { - void* buf = &_root->data[0] + _root_size; - _root_size += size; - return buf; - } - else - { - // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests - size_t block_capacity_base = sizeof(_root->data); - size_t block_capacity_req = size + block_capacity_base / 4; - size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; + xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) { +#ifdef PUGIXML_NO_EXCEPTIONS + error_handler = 0; +#endif + } - size_t block_size = block_capacity + offsetof(xpath_memory_block, data); + void* allocate_nothrow(size_t size) { + // round size up to block alignment boundary + size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); - if (!block) return 0; - - block->next = _root; - block->capacity = block_capacity; - - _root = block; - _root_size = size; - - return block->data; - } - } + if (_root_size + size <= _root->capacity) { + void* buf = &_root->data[0] + _root_size; + _root_size += size; + return buf; + } else { + // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests + size_t block_capacity_base = sizeof(_root->data); + size_t block_capacity_req = size + block_capacity_base / 4; + size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); + size_t block_size = block_capacity + offsetof(xpath_memory_block, data); - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } + xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); + if (!block) return 0; - return result; - } + block->next = _root; + block->capacity = block_capacity; - void* reallocate(void* ptr, size_t old_size, size_t new_size) - { - // round size up to block alignment boundary - old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); + _root = block; + _root_size = size; - // we can only reallocate the last object - assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); + return block->data; + } + } - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); + void* allocate(size_t size) { + void* result = allocate_nothrow(size); - if (ptr) _root_size -= old_size; + if (!result) { +#ifdef PUGIXML_NO_EXCEPTIONS + assert(error_handler); + longjmp(*error_handler, 1); +#else + throw std::bad_alloc(); +#endif + } - // allocate a new version (this will obviously reuse the memory if possible) - void* result = allocate(new_size); - assert(result); + return result; + } - // we have a new block - if (result != ptr && ptr) - { - // copy old data - assert(new_size >= old_size); - memcpy(result, ptr, old_size); + void* reallocate(void* ptr, size_t old_size, size_t new_size) { + // round size up to block alignment boundary + old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); + new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); + // we can only reallocate the last object + assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); - xpath_memory_block* next = _root->next->next; + // adjust root size so that we have not allocated the object at all + bool only_object = (_root_size == old_size); - if (next) - { - // deallocate the whole page, unless it was the first one - xml_memory::deallocate(_root->next); - _root->next = next; - } - } - } + if (ptr) _root_size -= old_size; - return result; - } + // allocate a new version (this will obviously reuse the memory if possible) + void* result = allocate(new_size); + assert(result); - void revert(const xpath_allocator& state) - { - // free all new pages - xpath_memory_block* cur = _root; + // we have a new block + if (result != ptr && ptr) { + // copy old data + assert(new_size >= old_size); + memcpy(result, ptr, old_size); - while (cur != state._root) - { - xpath_memory_block* next = cur->next; + // free the previous page if it had no other objects + if (only_object) { + assert(_root->data == result); + assert(_root->next); - xml_memory::deallocate(cur); + xpath_memory_block* next = _root->next->next; - cur = next; - } + if (next) { + // deallocate the whole page, unless it was the first one + xml_memory::deallocate(_root->next); + _root->next = next; + } + } + } - // restore state - _root = state._root; - _root_size = state._root_size; - } + return result; + } - void release() - { - xpath_memory_block* cur = _root; - assert(cur); + void revert(const xpath_allocator& state) { + // free all new pages + xpath_memory_block* cur = _root; - while (cur->next) - { - xpath_memory_block* next = cur->next; + while (cur != state._root) { + xpath_memory_block* next = cur->next; - xml_memory::deallocate(cur); + xml_memory::deallocate(cur); - cur = next; - } - } - }; + cur = next; + } - struct xpath_allocator_capture - { - xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) - { - } + // restore state + _root = state._root; + _root_size = state._root_size; + } - ~xpath_allocator_capture() - { - _target->revert(_state); - } + void release() { + xpath_memory_block* cur = _root; + assert(cur); - xpath_allocator* _target; - xpath_allocator _state; - }; + while (cur->next) { + xpath_memory_block* next = cur->next; - struct xpath_stack - { - xpath_allocator* result; - xpath_allocator* temp; - }; + xml_memory::deallocate(cur); - struct xpath_stack_data - { - xpath_memory_block blocks[2]; - xpath_allocator result; - xpath_allocator temp; - xpath_stack stack; + cur = next; + } + } +}; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif +struct xpath_allocator_capture { + xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) { + } - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) - { - blocks[0].next = blocks[1].next = 0; - blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); + ~xpath_allocator_capture() { + _target->revert(_state); + } - stack.result = &result; - stack.temp = &temp; + xpath_allocator* _target; + xpath_allocator _state; +}; - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif - } +struct xpath_stack { + xpath_allocator* result; + xpath_allocator* temp; +}; - ~xpath_stack_data() - { - result.release(); - temp.release(); - } - }; +struct xpath_stack_data { + xpath_memory_block blocks[2]; + xpath_allocator result; + xpath_allocator temp; + xpath_stack stack; + +#ifdef PUGIXML_NO_EXCEPTIONS + jmp_buf error_handler; +#endif + + xpath_stack_data(): result(blocks + 0), temp(blocks + 1) { + blocks[0].next = blocks[1].next = 0; + blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); + + stack.result = &result; + stack.temp = &temp; + +#ifdef PUGIXML_NO_EXCEPTIONS + result.error_handler = temp.error_handler = &error_handler; +#endif + } + + ~xpath_stack_data() { + result.release(); + temp.release(); + } +}; PUGI__NS_END // String class PUGI__NS_BEGIN - class xpath_string - { - const char_t* _buffer; - bool _uses_heap; - size_t _length_heap; +class xpath_string +{ + const char_t* _buffer; + bool _uses_heap; + size_t _length_heap; - static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) - { - char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); - assert(result); + static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { + char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); + assert(result); - memcpy(result, string, length * sizeof(char_t)); - result[length] = 0; + memcpy(result, string, length * sizeof(char_t)); + result[length] = 0; - return result; - } + return result; + } - xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) - { - } + xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) { + } - public: - static xpath_string from_const(const char_t* str) - { - return xpath_string(str, false, 0); - } +public: + static xpath_string from_const(const char_t* str) { + return xpath_string(str, false, 0); + } - static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) - { - assert(begin <= end && *end == 0); + static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) { + assert(begin <= end && *end == 0); - return xpath_string(begin, true, static_cast(end - begin)); - } + return xpath_string(begin, true, static_cast(end - begin)); + } - static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) - { - assert(begin <= end); + static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) { + assert(begin <= end); - size_t length = static_cast(end - begin); + size_t length = static_cast(end - begin); - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); - } + return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); + } - xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) - { - } + xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) { + } - void append(const xpath_string& o, xpath_allocator* alloc) - { - // skip empty sources - if (!*o._buffer) return; + void append(const xpath_string& o, xpath_allocator* alloc) { + // skip empty sources + if (!*o._buffer) return; - // fast append for constant empty target and constant source - if (!*_buffer && !_uses_heap && !o._uses_heap) - { - _buffer = o._buffer; - } - else - { - // need to make heap copy - size_t target_length = length(); - size_t source_length = o.length(); - size_t result_length = target_length + source_length; + // fast append for constant empty target and constant source + if (!*_buffer && !_uses_heap && !o._uses_heap) { + _buffer = o._buffer; + } else { + // need to make heap copy + size_t target_length = length(); + size_t source_length = o.length(); + size_t result_length = target_length + source_length; - // allocate new buffer - char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); + // allocate new buffer + char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); + assert(result); - // append first string to the new buffer in case there was no reallocation - if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); + // append first string to the new buffer in case there was no reallocation + if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); - // append second string to the new buffer - memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); - result[result_length] = 0; + // append second string to the new buffer + memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); + result[result_length] = 0; - // finalize - _buffer = result; - _uses_heap = true; - _length_heap = result_length; - } - } + // finalize + _buffer = result; + _uses_heap = true; + _length_heap = result_length; + } + } - const char_t* c_str() const - { - return _buffer; - } + const char_t* c_str() const { + return _buffer; + } - size_t length() const - { - return _uses_heap ? _length_heap : strlength(_buffer); - } - - char_t* data(xpath_allocator* alloc) - { - // make private heap copy - if (!_uses_heap) - { - size_t length_ = strlength(_buffer); + size_t length() const { + return _uses_heap ? _length_heap : strlength(_buffer); + } - _buffer = duplicate_string(_buffer, length_, alloc); - _uses_heap = true; - _length_heap = length_; - } + char_t* data(xpath_allocator* alloc) { + // make private heap copy + if (!_uses_heap) { + size_t length_ = strlength(_buffer); - return const_cast(_buffer); - } + _buffer = duplicate_string(_buffer, length_, alloc); + _uses_heap = true; + _length_heap = length_; + } - bool empty() const - { - return *_buffer == 0; - } + return const_cast(_buffer); + } - bool operator==(const xpath_string& o) const - { - return strequal(_buffer, o._buffer); - } + bool empty() const { + return *_buffer == 0; + } - bool operator!=(const xpath_string& o) const - { - return !strequal(_buffer, o._buffer); - } + bool operator==(const xpath_string& o) const { + return strequal(_buffer, o._buffer); + } - bool uses_heap() const - { - return _uses_heap; - } - }; + bool operator!=(const xpath_string& o) const { + return !strequal(_buffer, o._buffer); + } + + bool uses_heap() const { + return _uses_heap; + } +}; PUGI__NS_END PUGI__NS_BEGIN - PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) - { - while (*pattern && *string == *pattern) - { - string++; - pattern++; - } +PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) +{ + while (*pattern && *string == *pattern) { + string++; + pattern++; + } - return *pattern == 0; - } + return *pattern == 0; +} - PUGI__FN const char_t* find_char(const char_t* s, char_t c) - { - #ifdef PUGIXML_WCHAR_MODE - return wcschr(s, c); - #else - return strchr(s, c); - #endif - } - - PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) - { - #ifdef PUGIXML_WCHAR_MODE - // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) - return (*p == 0) ? s : wcsstr(s, p); - #else - return strstr(s, p); - #endif - } - - // Converts symbol to lower case, if it is an ASCII one - PUGI__FN char_t tolower_ascii(char_t ch) - { - return static_cast(ch - 'A') < 26 ? static_cast(ch | ' ') : ch; - } - - PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) - { - if (na.attribute()) - return xpath_string::from_const(na.attribute().value()); - else - { - xml_node n = na.node(); - - switch (n.type()) - { - case node_pcdata: - case node_cdata: - case node_comment: - case node_pi: - return xpath_string::from_const(n.value()); - - case node_document: - case node_element: - { - xpath_string result; - - xml_node cur = n.first_child(); - - while (cur && cur != n) - { - if (cur.type() == node_pcdata || cur.type() == node_cdata) - result.append(xpath_string::from_const(cur.value()), alloc); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (!cur.next_sibling() && cur != n) - cur = cur.parent(); - - if (cur != n) cur = cur.next_sibling(); - } - } - - return result; - } - - default: - return xpath_string(); - } - } - } - - PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) - { - assert(ln->parent == rn->parent); - - // there is no common ancestor (the shared parent is null), nodes are from different documents - if (!ln->parent) return ln < rn; - - // determine sibling order - xml_node_struct* ls = ln; - xml_node_struct* rs = rn; - - while (ls && rs) - { - if (ls == rn) return true; - if (rs == ln) return false; - - ls = ls->next_sibling; - rs = rs->next_sibling; - } - - // if rn sibling chain ended ln must be before rn - return !rs; - } - - PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) - { - // find common ancestor at the same depth, if any - xml_node_struct* lp = ln; - xml_node_struct* rp = rn; - - while (lp && rp && lp->parent != rp->parent) - { - lp = lp->parent; - rp = rp->parent; - } - - // parents are the same! - if (lp && rp) return node_is_before_sibling(lp, rp); - - // nodes are at different depths, need to normalize heights - bool left_higher = !lp; - - while (lp) - { - lp = lp->parent; - ln = ln->parent; - } - - while (rp) - { - rp = rp->parent; - rn = rn->parent; - } - - // one node is the ancestor of the other - if (ln == rn) return left_higher; - - // find common ancestor... again - while (ln->parent != rn->parent) - { - ln = ln->parent; - rn = rn->parent; - } - - return node_is_before_sibling(ln, rn); - } - - PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) - { - while (node && node != parent) node = node->parent; - - return parent && node == parent; - } - - PUGI__FN const void* document_buffer_order(const xpath_node& xnode) - { - xml_node_struct* node = xnode.node().internal_object(); - - if (node) - { - if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) - { - if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; - if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; - } - - return 0; - } - - xml_attribute_struct* attr = xnode.attribute().internal_object(); - - if (attr) - { - if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) - { - if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; - if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; - } - - return 0; - } - - return 0; - } - - struct document_order_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - // optimized document order based check - const void* lo = document_buffer_order(lhs); - const void* ro = document_buffer_order(rhs); - - if (lo && ro) return lo < ro; - - // slow comparison - xml_node ln = lhs.node(), rn = rhs.node(); - - // compare attributes - if (lhs.attribute() && rhs.attribute()) - { - // shared parent - if (lhs.parent() == rhs.parent()) - { - // determine sibling order - for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) - if (a == rhs.attribute()) - return true; - - return false; - } - - // compare attribute parents - ln = lhs.parent(); - rn = rhs.parent(); - } - else if (lhs.attribute()) - { - // attributes go after the parent element - if (lhs.parent() == rhs.node()) return false; - - ln = lhs.parent(); - } - else if (rhs.attribute()) - { - // attributes go after the parent element - if (rhs.parent() == lhs.node()) return true; - - rn = rhs.parent(); - } - - if (ln == rn) return false; - - if (!ln || !rn) return ln < rn; - - return node_is_before(ln.internal_object(), rn.internal_object()); - } - }; - - struct duplicate_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; - else return rhs.attribute() ? false : lhs.node() < rhs.node(); - } - }; - - PUGI__FN double gen_nan() - { - #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) - union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1]; - u[0].i = 0x7fc00000; - return u[0].f; - #else - // fallback - const volatile double zero = 0.0; - return zero / zero; - #endif - } - - PUGI__FN bool is_nan(double value) - { - #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) - return !!_isnan(value); - #elif defined(fpclassify) && defined(FP_NAN) - return fpclassify(value) == FP_NAN; - #else - // fallback - const volatile double v = value; - return v != v; - #endif - } - - PUGI__FN const char_t* convert_number_to_string_special(double value) - { - #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) - if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; - if (_isnan(value)) return PUGIXML_TEXT("NaN"); - return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) - switch (fpclassify(value)) - { - case FP_NAN: - return PUGIXML_TEXT("NaN"); - - case FP_INFINITE: - return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - - case FP_ZERO: - return PUGIXML_TEXT("0"); - - default: - return 0; - } - #else - // fallback - const volatile double v = value; - - if (v == 0) return PUGIXML_TEXT("0"); - if (v != v) return PUGIXML_TEXT("NaN"); - if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - return 0; - #endif - } - - PUGI__FN bool convert_number_to_boolean(double value) - { - return (value != 0 && !is_nan(value)); - } - - PUGI__FN void truncate_zeros(char* begin, char* end) - { - while (begin != end && end[-1] == '0') end--; - - *end = 0; - } - - // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent -#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) - { - // get base values - int sign, exponent; - _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); - - // truncate redundant zeros - truncate_zeros(buffer, buffer + strlen(buffer)); - - // fill results - *out_mantissa = buffer; - *out_exponent = exponent; - } +PUGI__FN const char_t* find_char(const char_t* s, char_t c) +{ +#ifdef PUGIXML_WCHAR_MODE + return wcschr(s, c); #else - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) - { - // get a scientific notation value with IEEE DBL_DIG decimals - sprintf(buffer, "%.*e", DBL_DIG, value); - assert(strlen(buffer) < buffer_size); - (void)!buffer_size; + return strchr(s, c); +#endif +} - // get the exponent (possibly negative) - char* exponent_string = strchr(buffer, 'e'); - assert(exponent_string); +PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) +{ +#ifdef PUGIXML_WCHAR_MODE + // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) + return (*p == 0) ? s : wcsstr(s, p); +#else + return strstr(s, p); +#endif +} - int exponent = atoi(exponent_string + 1); +// Converts symbol to lower case, if it is an ASCII one +PUGI__FN char_t tolower_ascii(char_t ch) +{ + return static_cast(ch - 'A') < 26 ? static_cast(ch | ' ') : ch; +} - // extract mantissa string: skip sign - char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; - assert(mantissa[0] != '0' && mantissa[1] == '.'); +PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) +{ + if (na.attribute()) + return xpath_string::from_const(na.attribute().value()); + else { + xml_node n = na.node(); - // divide mantissa by 10 to eliminate integer part - mantissa[1] = mantissa[0]; - mantissa++; - exponent++; + switch (n.type()) { + case node_pcdata: + case node_cdata: + case node_comment: + case node_pi: + return xpath_string::from_const(n.value()); - // remove extra mantissa digits and zero-terminate mantissa - truncate_zeros(mantissa, exponent_string); + case node_document: + case node_element: { + xpath_string result; - // fill results - *out_mantissa = mantissa; - *out_exponent = exponent; - } + xml_node cur = n.first_child(); + + while (cur && cur != n) { + if (cur.type() == node_pcdata || cur.type() == node_cdata) + result.append(xpath_string::from_const(cur.value()), alloc); + + if (cur.first_child()) + cur = cur.first_child(); + else if (cur.next_sibling()) + cur = cur.next_sibling(); + else { + while (!cur.next_sibling() && cur != n) + cur = cur.parent(); + + if (cur != n) cur = cur.next_sibling(); + } + } + + return result; + } + + default: + return xpath_string(); + } + } +} + +PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) +{ + assert(ln->parent == rn->parent); + + // there is no common ancestor (the shared parent is null), nodes are from different documents + if (!ln->parent) return ln < rn; + + // determine sibling order + xml_node_struct* ls = ln; + xml_node_struct* rs = rn; + + while (ls && rs) { + if (ls == rn) return true; + if (rs == ln) return false; + + ls = ls->next_sibling; + rs = rs->next_sibling; + } + + // if rn sibling chain ended ln must be before rn + return !rs; +} + +PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) +{ + // find common ancestor at the same depth, if any + xml_node_struct* lp = ln; + xml_node_struct* rp = rn; + + while (lp && rp && lp->parent != rp->parent) { + lp = lp->parent; + rp = rp->parent; + } + + // parents are the same! + if (lp && rp) return node_is_before_sibling(lp, rp); + + // nodes are at different depths, need to normalize heights + bool left_higher = !lp; + + while (lp) { + lp = lp->parent; + ln = ln->parent; + } + + while (rp) { + rp = rp->parent; + rn = rn->parent; + } + + // one node is the ancestor of the other + if (ln == rn) return left_higher; + + // find common ancestor... again + while (ln->parent != rn->parent) { + ln = ln->parent; + rn = rn->parent; + } + + return node_is_before_sibling(ln, rn); +} + +PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) +{ + while (node && node != parent) node = node->parent; + + return parent && node == parent; +} + +PUGI__FN const void* document_buffer_order(const xpath_node& xnode) +{ + xml_node_struct* node = xnode.node().internal_object(); + + if (node) { + if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) { + if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; + if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; + } + + return 0; + } + + xml_attribute_struct* attr = xnode.attribute().internal_object(); + + if (attr) { + if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) { + if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; + if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; + } + + return 0; + } + + return 0; +} + +struct document_order_comparator { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const { + // optimized document order based check + const void* lo = document_buffer_order(lhs); + const void* ro = document_buffer_order(rhs); + + if (lo && ro) return lo < ro; + + // slow comparison + xml_node ln = lhs.node(), rn = rhs.node(); + + // compare attributes + if (lhs.attribute() && rhs.attribute()) { + // shared parent + if (lhs.parent() == rhs.parent()) { + // determine sibling order + for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) + if (a == rhs.attribute()) + return true; + + return false; + } + + // compare attribute parents + ln = lhs.parent(); + rn = rhs.parent(); + } else if (lhs.attribute()) { + // attributes go after the parent element + if (lhs.parent() == rhs.node()) return false; + + ln = lhs.parent(); + } else if (rhs.attribute()) { + // attributes go after the parent element + if (rhs.parent() == lhs.node()) return true; + + rn = rhs.parent(); + } + + if (ln == rn) return false; + + if (!ln || !rn) return ln < rn; + + return node_is_before(ln.internal_object(), rn.internal_object()); + } +}; + +struct duplicate_comparator { + bool operator()(const xpath_node& lhs, const xpath_node& rhs) const { + if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; + else return rhs.attribute() ? false : lhs.node() < rhs.node(); + } +}; + +PUGI__FN double gen_nan() +{ +#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) + union { + float f; + uint32_t i; + } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1]; + u[0].i = 0x7fc00000; + return u[0].f; +#else + // fallback + const volatile double zero = 0.0; + return zero / zero; +#endif +} + +PUGI__FN bool is_nan(double value) +{ +#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) + return !!_isnan(value); +#elif defined(fpclassify) && defined(FP_NAN) + return fpclassify(value) == FP_NAN; +#else + // fallback + const volatile double v = value; + return v != v; +#endif +} + +PUGI__FN const char_t* convert_number_to_string_special(double value) +{ +#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) + if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; + if (_isnan(value)) return PUGIXML_TEXT("NaN"); + return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); +#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) + switch (fpclassify(value)) { + case FP_NAN: + return PUGIXML_TEXT("NaN"); + + case FP_INFINITE: + return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); + + case FP_ZERO: + return PUGIXML_TEXT("0"); + + default: + return 0; + } +#else + // fallback + const volatile double v = value; + + if (v == 0) return PUGIXML_TEXT("0"); + if (v != v) return PUGIXML_TEXT("NaN"); + if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); + return 0; +#endif +} + +PUGI__FN bool convert_number_to_boolean(double value) +{ + return (value != 0 && !is_nan(value)); +} + +PUGI__FN void truncate_zeros(char* begin, char* end) +{ + while (begin != end && end[-1] == '0') end--; + + *end = 0; +} + +// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) +PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) +{ + // get base values + int sign, exponent; + _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); + + // truncate redundant zeros + truncate_zeros(buffer, buffer + strlen(buffer)); + + // fill results + *out_mantissa = buffer; + *out_exponent = exponent; +} +#else +PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) +{ + // get a scientific notation value with IEEE DBL_DIG decimals + sprintf(buffer, "%.*e", DBL_DIG, value); + assert(strlen(buffer) < buffer_size); + (void)!buffer_size; + + // get the exponent (possibly negative) + char* exponent_string = strchr(buffer, 'e'); + assert(exponent_string); + + int exponent = atoi(exponent_string + 1); + + // extract mantissa string: skip sign + char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; + assert(mantissa[0] != '0' && mantissa[1] == '.'); + + // divide mantissa by 10 to eliminate integer part + mantissa[1] = mantissa[0]; + mantissa++; + exponent++; + + // remove extra mantissa digits and zero-terminate mantissa + truncate_zeros(mantissa, exponent_string); + + // fill results + *out_mantissa = mantissa; + *out_exponent = exponent; +} #endif - PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) - { - // try special number conversion - const char_t* special = convert_number_to_string_special(value); - if (special) return xpath_string::from_const(special); - - // get mantissa + exponent form - char mantissa_buffer[32]; - - char* mantissa; - int exponent; - convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); - - // allocate a buffer of suitable length for the number - size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; - char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); - assert(result); - - // make the number! - char_t* s = result; - - // sign - if (value < 0) *s++ = '-'; - - // integer part - if (exponent <= 0) - { - *s++ = '0'; - } - else - { - while (exponent > 0) - { - assert(*mantissa == 0 || static_cast(static_cast(*mantissa) - '0') <= 9); - *s++ = *mantissa ? *mantissa++ : '0'; - exponent--; - } - } - - // fractional part - if (*mantissa) - { - // decimal point - *s++ = '.'; - - // extra zeroes from negative exponent - while (exponent < 0) - { - *s++ = '0'; - exponent++; - } - - // extra mantissa digits - while (*mantissa) - { - assert(static_cast(*mantissa - '0') <= 9); - *s++ = *mantissa++; - } - } - - // zero-terminate - assert(s < result + result_size); - *s = 0; - - return xpath_string::from_heap_preallocated(result, s); - } - - PUGI__FN bool check_string_to_number_format(const char_t* string) - { - // parse leading whitespace - while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; - - // parse sign - if (*string == '-') ++string; - - if (!*string) return false; - - // if there is no integer part, there should be a decimal part with at least one digit - if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; - - // parse integer part - while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; - - // parse decimal part - if (*string == '.') - { - ++string; - - while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; - } - - // parse trailing whitespace - while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; - - return *string == 0; - } - - PUGI__FN double convert_string_to_number(const char_t* string) - { - // check string format - if (!check_string_to_number_format(string)) return gen_nan(); - - // parse string - #ifdef PUGIXML_WCHAR_MODE - return wcstod(string, 0); - #else - return strtod(string, 0); - #endif - } - - PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) - { - size_t length = static_cast(end - begin); - char_t* scratch = buffer; - - if (length >= sizeof(buffer) / sizeof(buffer[0])) - { - // need to make dummy on-heap copy - scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!scratch) return false; - } - - // copy string to zero-terminated buffer and perform conversion - memcpy(scratch, begin, length * sizeof(char_t)); - scratch[length] = 0; - - *out_result = convert_string_to_number(scratch); - - // free dummy buffer - if (scratch != buffer) xml_memory::deallocate(scratch); - - return true; - } - - PUGI__FN double round_nearest(double value) - { - return floor(value + 0.5); - } - - PUGI__FN double round_nearest_nzero(double value) - { - // same as round_nearest, but returns -0 for [-0.5, -0] - // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) - return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); - } - - PUGI__FN const char_t* qualified_name(const xpath_node& node) - { - return node.attribute() ? node.attribute().name() : node.node().name(); - } - - PUGI__FN const char_t* local_name(const xpath_node& node) - { - const char_t* name = qualified_name(node); - const char_t* p = find_char(name, ':'); - - return p ? p + 1 : name; - } - - struct namespace_uri_predicate - { - const char_t* prefix; - size_t prefix_length; - - namespace_uri_predicate(const char_t* name) - { - const char_t* pos = find_char(name, ':'); - - prefix = pos ? name : 0; - prefix_length = pos ? static_cast(pos - name) : 0; - } - - bool operator()(xml_attribute a) const - { - const char_t* name = a.name(); - - if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; - - return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; - } - }; - - PUGI__FN const char_t* namespace_uri(xml_node node) - { - namespace_uri_predicate pred = node.name(); - - xml_node p = node; - - while (p) - { - xml_attribute a = p.find_attribute(pred); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) - { - namespace_uri_predicate pred = attr.name(); - - // Default namespace does not apply to attributes - if (!pred.prefix) return PUGIXML_TEXT(""); - - xml_node p = parent; - - while (p) - { - xml_attribute a = p.find_attribute(pred); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* namespace_uri(const xpath_node& node) - { - return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); - } - - PUGI__FN char_t* normalize_space(char_t* buffer) - { - char_t* write = buffer; - - for (char_t* it = buffer; *it; ) - { - char_t ch = *it++; - - if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - // replace whitespace sequence with single space - while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; - - // avoid leading spaces - if (write != buffer) *write++ = ' '; - } - else *write++ = ch; - } - - // remove trailing space - if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; - - // zero-terminate - *write = 0; - - return write; - } - - PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) - { - char_t* write = buffer; - - while (*buffer) - { - PUGI__DMC_VOLATILE char_t ch = *buffer++; - - const char_t* pos = find_char(from, ch); - - if (!pos) - *write++ = ch; // do not process - else if (static_cast(pos - from) < to_length) - *write++ = to[pos - from]; // replace - } - - // zero-terminate - *write = 0; - - return write; - } - - PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) - { - unsigned char table[128] = {0}; - - while (*from) - { - unsigned int fc = static_cast(*from); - unsigned int tc = static_cast(*to); - - if (fc >= 128 || tc >= 128) - return 0; - - // code=128 means "skip character" - if (!table[fc]) - table[fc] = static_cast(tc ? tc : 128); - - from++; - if (tc) to++; - } - - for (int i = 0; i < 128; ++i) - if (!table[i]) - table[i] = static_cast(i); - - void* result = alloc->allocate_nothrow(sizeof(table)); - - if (result) - { - memcpy(result, table, sizeof(table)); - } - - return static_cast(result); - } - - PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) - { - char_t* write = buffer; - - while (*buffer) - { - char_t ch = *buffer++; - unsigned int index = static_cast(ch); - - if (index < 128) - { - unsigned char code = table[index]; - - // code=128 means "skip character" (table size is 128 so 128 can be a special value) - // this code skips these characters without extra branches - *write = static_cast(code); - write += 1 - (code >> 7); - } - else - { - *write++ = ch; - } - } - - // zero-terminate - *write = 0; - - return write; - } - - inline bool is_xpath_attribute(const char_t* name) - { - return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); - } - - struct xpath_variable_boolean: xpath_variable - { - xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) - { - } - - bool value; - char_t name[1]; - }; - - struct xpath_variable_number: xpath_variable - { - xpath_variable_number(): xpath_variable(xpath_type_number), value(0) - { - } - - double value; - char_t name[1]; - }; - - struct xpath_variable_string: xpath_variable - { - xpath_variable_string(): xpath_variable(xpath_type_string), value(0) - { - } - - ~xpath_variable_string() - { - if (value) xml_memory::deallocate(value); - } - - char_t* value; - char_t name[1]; - }; - - struct xpath_variable_node_set: xpath_variable - { - xpath_variable_node_set(): xpath_variable(xpath_type_node_set) - { - } - - xpath_node_set value; - char_t name[1]; - }; - - static const xpath_node_set dummy_node_set; - - PUGI__FN unsigned int hash_string(const char_t* str) - { - // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) - unsigned int result = 0; - - while (*str) - { - result += static_cast(*str++); - result += result << 10; - result ^= result >> 6; - } - - result += result << 3; - result ^= result >> 11; - result += result << 15; - - return result; - } - - template PUGI__FN T* new_xpath_variable(const char_t* name) - { - size_t length = strlength(name); - if (length == 0) return 0; // empty variable names are invalid - - // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters - void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); - if (!memory) return 0; - - T* result = new (memory) T(); - - memcpy(result->name, name, (length + 1) * sizeof(char_t)); - - return result; - } - - PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) - { - switch (type) - { - case xpath_type_node_set: - return new_xpath_variable(name); - - case xpath_type_number: - return new_xpath_variable(name); - - case xpath_type_string: - return new_xpath_variable(name); - - case xpath_type_boolean: - return new_xpath_variable(name); - - default: - return 0; - } - } - - template PUGI__FN void delete_xpath_variable(T* var) - { - var->~T(); - xml_memory::deallocate(var); - } - - PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) - { - switch (type) - { - case xpath_type_node_set: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_number: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_string: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_boolean: - delete_xpath_variable(static_cast(var)); - break; - - default: - assert(!"Invalid variable type"); - } - } - - PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) - { - switch (rhs->type()) - { - case xpath_type_node_set: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_number: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_string: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_boolean: - return lhs->set(static_cast(rhs)->value); - - default: - assert(!"Invalid variable type"); - return false; - } - } - - PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) - { - size_t length = static_cast(end - begin); - char_t* scratch = buffer; - - if (length >= sizeof(buffer) / sizeof(buffer[0])) - { - // need to make dummy on-heap copy - scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!scratch) return false; - } - - // copy string to zero-terminated buffer and perform lookup - memcpy(scratch, begin, length * sizeof(char_t)); - scratch[length] = 0; - - *out_result = set->get(scratch); - - // free dummy buffer - if (scratch != buffer) xml_memory::deallocate(scratch); - - return true; - } +PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) +{ + // try special number conversion + const char_t* special = convert_number_to_string_special(value); + if (special) return xpath_string::from_const(special); + + // get mantissa + exponent form + char mantissa_buffer[32]; + + char* mantissa; + int exponent; + convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); + + // allocate a buffer of suitable length for the number + size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; + char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); + assert(result); + + // make the number! + char_t* s = result; + + // sign + if (value < 0) *s++ = '-'; + + // integer part + if (exponent <= 0) { + *s++ = '0'; + } else { + while (exponent > 0) { + assert(*mantissa == 0 || static_cast(static_cast(*mantissa) - '0') <= 9); + *s++ = *mantissa ? *mantissa++ : '0'; + exponent--; + } + } + + // fractional part + if (*mantissa) { + // decimal point + *s++ = '.'; + + // extra zeroes from negative exponent + while (exponent < 0) { + *s++ = '0'; + exponent++; + } + + // extra mantissa digits + while (*mantissa) { + assert(static_cast(*mantissa - '0') <= 9); + *s++ = *mantissa++; + } + } + + // zero-terminate + assert(s < result + result_size); + *s = 0; + + return xpath_string::from_heap_preallocated(result, s); +} + +PUGI__FN bool check_string_to_number_format(const char_t* string) +{ + // parse leading whitespace + while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; + + // parse sign + if (*string == '-') ++string; + + if (!*string) return false; + + // if there is no integer part, there should be a decimal part with at least one digit + if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; + + // parse integer part + while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; + + // parse decimal part + if (*string == '.') { + ++string; + + while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; + } + + // parse trailing whitespace + while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; + + return *string == 0; +} + +PUGI__FN double convert_string_to_number(const char_t* string) +{ + // check string format + if (!check_string_to_number_format(string)) return gen_nan(); + + // parse string +#ifdef PUGIXML_WCHAR_MODE + return wcstod(string, 0); +#else + return strtod(string, 0); +#endif +} + +PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) +{ + size_t length = static_cast(end - begin); + char_t* scratch = buffer; + + if (length >= sizeof(buffer) / sizeof(buffer[0])) { + // need to make dummy on-heap copy + scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!scratch) return false; + } + + // copy string to zero-terminated buffer and perform conversion + memcpy(scratch, begin, length * sizeof(char_t)); + scratch[length] = 0; + + *out_result = convert_string_to_number(scratch); + + // free dummy buffer + if (scratch != buffer) xml_memory::deallocate(scratch); + + return true; +} + +PUGI__FN double round_nearest(double value) +{ + return floor(value + 0.5); +} + +PUGI__FN double round_nearest_nzero(double value) +{ + // same as round_nearest, but returns -0 for [-0.5, -0] + // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) + return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); +} + +PUGI__FN const char_t* qualified_name(const xpath_node& node) +{ + return node.attribute() ? node.attribute().name() : node.node().name(); +} + +PUGI__FN const char_t* local_name(const xpath_node& node) +{ + const char_t* name = qualified_name(node); + const char_t* p = find_char(name, ':'); + + return p ? p + 1 : name; +} + +struct namespace_uri_predicate { + const char_t* prefix; + size_t prefix_length; + + namespace_uri_predicate(const char_t* name) { + const char_t* pos = find_char(name, ':'); + + prefix = pos ? name : 0; + prefix_length = pos ? static_cast(pos - name) : 0; + } + + bool operator()(xml_attribute a) const { + const char_t* name = a.name(); + + if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; + + return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; + } +}; + +PUGI__FN const char_t* namespace_uri(xml_node node) +{ + namespace_uri_predicate pred = node.name(); + + xml_node p = node; + + while (p) { + xml_attribute a = p.find_attribute(pred); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); +} + +PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) +{ + namespace_uri_predicate pred = attr.name(); + + // Default namespace does not apply to attributes + if (!pred.prefix) return PUGIXML_TEXT(""); + + xml_node p = parent; + + while (p) { + xml_attribute a = p.find_attribute(pred); + + if (a) return a.value(); + + p = p.parent(); + } + + return PUGIXML_TEXT(""); +} + +PUGI__FN const char_t* namespace_uri(const xpath_node& node) +{ + return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); +} + +PUGI__FN char_t* normalize_space(char_t* buffer) +{ + char_t* write = buffer; + + for (char_t* it = buffer; *it; ) { + char_t ch = *it++; + + if (PUGI__IS_CHARTYPE(ch, ct_space)) { + // replace whitespace sequence with single space + while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; + + // avoid leading spaces + if (write != buffer) *write++ = ' '; + } else *write++ = ch; + } + + // remove trailing space + if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; + + // zero-terminate + *write = 0; + + return write; +} + +PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) +{ + char_t* write = buffer; + + while (*buffer) { + PUGI__DMC_VOLATILE char_t ch = *buffer++; + + const char_t* pos = find_char(from, ch); + + if (!pos) + *write++ = ch; // do not process + else if (static_cast(pos - from) < to_length) + *write++ = to[pos - from]; // replace + } + + // zero-terminate + *write = 0; + + return write; +} + +PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) +{ + unsigned char table[128] = {0}; + + while (*from) { + unsigned int fc = static_cast(*from); + unsigned int tc = static_cast(*to); + + if (fc >= 128 || tc >= 128) + return 0; + + // code=128 means "skip character" + if (!table[fc]) + table[fc] = static_cast(tc ? tc : 128); + + from++; + if (tc) to++; + } + + for (int i = 0; i < 128; ++i) + if (!table[i]) + table[i] = static_cast(i); + + void* result = alloc->allocate_nothrow(sizeof(table)); + + if (result) { + memcpy(result, table, sizeof(table)); + } + + return static_cast(result); +} + +PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) +{ + char_t* write = buffer; + + while (*buffer) { + char_t ch = *buffer++; + unsigned int index = static_cast(ch); + + if (index < 128) { + unsigned char code = table[index]; + + // code=128 means "skip character" (table size is 128 so 128 can be a special value) + // this code skips these characters without extra branches + *write = static_cast(code); + write += 1 - (code >> 7); + } else { + *write++ = ch; + } + } + + // zero-terminate + *write = 0; + + return write; +} + +inline bool is_xpath_attribute(const char_t* name) +{ + return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); +} + +struct xpath_variable_boolean: xpath_variable { + xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) { + } + + bool value; + char_t name[1]; +}; + +struct xpath_variable_number: xpath_variable { + xpath_variable_number(): xpath_variable(xpath_type_number), value(0) { + } + + double value; + char_t name[1]; +}; + +struct xpath_variable_string: xpath_variable { + xpath_variable_string(): xpath_variable(xpath_type_string), value(0) { + } + + ~xpath_variable_string() { + if (value) xml_memory::deallocate(value); + } + + char_t* value; + char_t name[1]; +}; + +struct xpath_variable_node_set: xpath_variable { + xpath_variable_node_set(): xpath_variable(xpath_type_node_set) { + } + + xpath_node_set value; + char_t name[1]; +}; + +static const xpath_node_set dummy_node_set; + +PUGI__FN unsigned int hash_string(const char_t* str) +{ + // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) + unsigned int result = 0; + + while (*str) { + result += static_cast(*str++); + result += result << 10; + result ^= result >> 6; + } + + result += result << 3; + result ^= result >> 11; + result += result << 15; + + return result; +} + +template PUGI__FN T* new_xpath_variable(const char_t* name) +{ + size_t length = strlength(name); + if (length == 0) return 0; // empty variable names are invalid + + // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters + void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); + if (!memory) return 0; + + T* result = new (memory) T(); + + memcpy(result->name, name, (length + 1) * sizeof(char_t)); + + return result; +} + +PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) +{ + switch (type) { + case xpath_type_node_set: + return new_xpath_variable(name); + + case xpath_type_number: + return new_xpath_variable(name); + + case xpath_type_string: + return new_xpath_variable(name); + + case xpath_type_boolean: + return new_xpath_variable(name); + + default: + return 0; + } +} + +template PUGI__FN void delete_xpath_variable(T* var) +{ + var->~T(); + xml_memory::deallocate(var); +} + +PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) +{ + switch (type) { + case xpath_type_node_set: + delete_xpath_variable(static_cast(var)); + break; + + case xpath_type_number: + delete_xpath_variable(static_cast(var)); + break; + + case xpath_type_string: + delete_xpath_variable(static_cast(var)); + break; + + case xpath_type_boolean: + delete_xpath_variable(static_cast(var)); + break; + + default: + assert(!"Invalid variable type"); + } +} + +PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) +{ + switch (rhs->type()) { + case xpath_type_node_set: + return lhs->set(static_cast(rhs)->value); + + case xpath_type_number: + return lhs->set(static_cast(rhs)->value); + + case xpath_type_string: + return lhs->set(static_cast(rhs)->value); + + case xpath_type_boolean: + return lhs->set(static_cast(rhs)->value); + + default: + assert(!"Invalid variable type"); + return false; + } +} + +PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) +{ + size_t length = static_cast(end - begin); + char_t* scratch = buffer; + + if (length >= sizeof(buffer) / sizeof(buffer[0])) { + // need to make dummy on-heap copy + scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); + if (!scratch) return false; + } + + // copy string to zero-terminated buffer and perform lookup + memcpy(scratch, begin, length * sizeof(char_t)); + scratch[length] = 0; + + *out_result = set->get(scratch); + + // free dummy buffer + if (scratch != buffer) xml_memory::deallocate(scratch); + + return true; +} PUGI__NS_END // Internal node set class PUGI__NS_BEGIN - PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) - { - if (end - begin < 2) - return xpath_node_set::type_sorted; +PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) +{ + if (end - begin < 2) + return xpath_node_set::type_sorted; - document_order_comparator cmp; + document_order_comparator cmp; - bool first = cmp(begin[0], begin[1]); + bool first = cmp(begin[0], begin[1]); - for (const xpath_node* it = begin + 1; it + 1 < end; ++it) - if (cmp(it[0], it[1]) != first) - return xpath_node_set::type_unsorted; + for (const xpath_node* it = begin + 1; it + 1 < end; ++it) + if (cmp(it[0], it[1]) != first) + return xpath_node_set::type_unsorted; - return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; - } + return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; +} - PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) - { - xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; +PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) +{ + xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; - if (type == xpath_node_set::type_unsorted) - { - xpath_node_set::type_t sorted = xpath_get_order(begin, end); + if (type == xpath_node_set::type_unsorted) { + xpath_node_set::type_t sorted = xpath_get_order(begin, end); - if (sorted == xpath_node_set::type_unsorted) - { - sort(begin, end, document_order_comparator()); + if (sorted == xpath_node_set::type_unsorted) { + sort(begin, end, document_order_comparator()); - type = xpath_node_set::type_sorted; - } - else - type = sorted; - } - - if (type != order) reverse(begin, end); - - return order; - } + type = xpath_node_set::type_sorted; + } else + type = sorted; + } - PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) - { - if (begin == end) return xpath_node(); + if (type != order) reverse(begin, end); - switch (type) - { - case xpath_node_set::type_sorted: - return *begin; + return order; +} - case xpath_node_set::type_sorted_reverse: - return *(end - 1); +PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) +{ + if (begin == end) return xpath_node(); - case xpath_node_set::type_unsorted: - return *min_element(begin, end, document_order_comparator()); + switch (type) { + case xpath_node_set::type_sorted: + return *begin; - default: - assert(!"Invalid node set type"); - return xpath_node(); - } - } + case xpath_node_set::type_sorted_reverse: + return *(end - 1); - class xpath_node_set_raw - { - xpath_node_set::type_t _type; + case xpath_node_set::type_unsorted: + return *min_element(begin, end, document_order_comparator()); - xpath_node* _begin; - xpath_node* _end; - xpath_node* _eos; + default: + assert(!"Invalid node set type"); + return xpath_node(); + } +} - public: - xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) - { - } +class xpath_node_set_raw +{ + xpath_node_set::type_t _type; - xpath_node* begin() const - { - return _begin; - } + xpath_node* _begin; + xpath_node* _end; + xpath_node* _eos; - xpath_node* end() const - { - return _end; - } +public: + xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) { + } - bool empty() const - { - return _begin == _end; - } + xpath_node* begin() const { + return _begin; + } - size_t size() const - { - return static_cast(_end - _begin); - } + xpath_node* end() const { + return _end; + } - xpath_node first() const - { - return xpath_first(_begin, _end, _type); - } + bool empty() const { + return _begin == _end; + } - void push_back_grow(const xpath_node& node, xpath_allocator* alloc); + size_t size() const { + return static_cast(_end - _begin); + } - void push_back(const xpath_node& node, xpath_allocator* alloc) - { - if (_end != _eos) - *_end++ = node; - else - push_back_grow(node, alloc); - } + xpath_node first() const { + return xpath_first(_begin, _end, _type); + } - void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) - { - if (begin_ == end_) return; + void push_back_grow(const xpath_node& node, xpath_allocator* alloc); - size_t size_ = static_cast(_end - _begin); - size_t capacity = static_cast(_eos - _begin); - size_t count = static_cast(end_ - begin_); + void push_back(const xpath_node& node, xpath_allocator* alloc) { + if (_end != _eos) + *_end++ = node; + else + push_back_grow(node, alloc); + } - if (size_ + count > capacity) - { - // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); + void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) { + if (begin_ == end_) return; - // finalize - _begin = data; - _end = data + size_; - _eos = data + size_ + count; - } + size_t size_ = static_cast(_end - _begin); + size_t capacity = static_cast(_eos - _begin); + size_t count = static_cast(end_ - begin_); - memcpy(_end, begin_, count * sizeof(xpath_node)); - _end += count; - } + if (size_ + count > capacity) { + // reallocate the old array or allocate a new one + xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); + assert(data); - void sort_do() - { - _type = xpath_sort(_begin, _end, _type, false); - } + // finalize + _begin = data; + _end = data + size_; + _eos = data + size_ + count; + } - void truncate(xpath_node* pos) - { - assert(_begin <= pos && pos <= _end); + memcpy(_end, begin_, count * sizeof(xpath_node)); + _end += count; + } - _end = pos; - } + void sort_do() { + _type = xpath_sort(_begin, _end, _type, false); + } - void remove_duplicates() - { - if (_type == xpath_node_set::type_unsorted) - sort(_begin, _end, duplicate_comparator()); - - _end = unique(_begin, _end); - } + void truncate(xpath_node* pos) { + assert(_begin <= pos && pos <= _end); - xpath_node_set::type_t type() const - { - return _type; - } + _end = pos; + } - void set_type(xpath_node_set::type_t value) - { - _type = value; - } - }; + void remove_duplicates() { + if (_type == xpath_node_set::type_unsorted) + sort(_begin, _end, duplicate_comparator()); - PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) - { - size_t capacity = static_cast(_eos - _begin); + _end = unique(_begin, _end); + } - // get new capacity (1.5x rule) - size_t new_capacity = capacity + capacity / 2 + 1; + xpath_node_set::type_t type() const { + return _type; + } - // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); + void set_type(xpath_node_set::type_t value) { + _type = value; + } +}; - // finalize - _begin = data; - _end = data + capacity; - _eos = data + new_capacity; +PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) +{ + size_t capacity = static_cast(_eos - _begin); - // push - *_end++ = node; - } + // get new capacity (1.5x rule) + size_t new_capacity = capacity + capacity / 2 + 1; + + // reallocate the old array or allocate a new one + xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); + assert(data); + + // finalize + _begin = data; + _end = data + capacity; + _eos = data + new_capacity; + + // push + *_end++ = node; +} PUGI__NS_END PUGI__NS_BEGIN - struct xpath_context - { - xpath_node n; - size_t position, size; - - xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) - { - } - }; - - enum lexeme_t - { - lex_none = 0, - lex_equal, - lex_not_equal, - lex_less, - lex_greater, - lex_less_or_equal, - lex_greater_or_equal, - lex_plus, - lex_minus, - lex_multiply, - lex_union, - lex_var_ref, - lex_open_brace, - lex_close_brace, - lex_quoted_string, - lex_number, - lex_slash, - lex_double_slash, - lex_open_square_brace, - lex_close_square_brace, - lex_string, - lex_comma, - lex_axis_attribute, - lex_dot, - lex_double_dot, - lex_double_colon, - lex_eof - }; - - struct xpath_lexer_string - { - const char_t* begin; - const char_t* end; - - xpath_lexer_string(): begin(0), end(0) - { - } - - bool operator==(const char_t* other) const - { - size_t length = static_cast(end - begin); - - return strequalrange(other, begin, length); - } - }; - - class xpath_lexer - { - const char_t* _cur; - const char_t* _cur_lexeme_pos; - xpath_lexer_string _cur_lexeme_contents; - - lexeme_t _cur_lexeme; - - public: - explicit xpath_lexer(const char_t* query): _cur(query) - { - next(); - } - - const char_t* state() const - { - return _cur; - } - - void next() - { - const char_t* cur = _cur; - - while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; - - // save lexeme position for error reporting - _cur_lexeme_pos = cur; - - switch (*cur) - { - case 0: - _cur_lexeme = lex_eof; - break; - - case '>': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_greater_or_equal; - } - else - { - cur += 1; - _cur_lexeme = lex_greater; - } - break; - - case '<': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_less_or_equal; - } - else - { - cur += 1; - _cur_lexeme = lex_less; - } - break; - - case '!': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_not_equal; - } - else - { - _cur_lexeme = lex_none; - } - break; - - case '=': - cur += 1; - _cur_lexeme = lex_equal; - - break; - - case '+': - cur += 1; - _cur_lexeme = lex_plus; - - break; - - case '-': - cur += 1; - _cur_lexeme = lex_minus; - - break; - - case '*': - cur += 1; - _cur_lexeme = lex_multiply; - - break; - - case '|': - cur += 1; - _cur_lexeme = lex_union; - - break; - - case '$': - cur += 1; - - if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname - { - cur++; // : - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_var_ref; - } - else - { - _cur_lexeme = lex_none; - } - - break; - - case '(': - cur += 1; - _cur_lexeme = lex_open_brace; - - break; - - case ')': - cur += 1; - _cur_lexeme = lex_close_brace; - - break; - - case '[': - cur += 1; - _cur_lexeme = lex_open_square_brace; - - break; - - case ']': - cur += 1; - _cur_lexeme = lex_close_square_brace; - - break; - - case ',': - cur += 1; - _cur_lexeme = lex_comma; - - break; - - case '/': - if (*(cur+1) == '/') - { - cur += 2; - _cur_lexeme = lex_double_slash; - } - else - { - cur += 1; - _cur_lexeme = lex_slash; - } - break; - - case '.': - if (*(cur+1) == '.') - { - cur += 2; - _cur_lexeme = lex_double_dot; - } - else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) - { - _cur_lexeme_contents.begin = cur; // . - - ++cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_number; - } - else - { - cur += 1; - _cur_lexeme = lex_dot; - } - break; - - case '@': - cur += 1; - _cur_lexeme = lex_axis_attribute; - - break; - - case '"': - case '\'': - { - char_t terminator = *cur; - - ++cur; - - _cur_lexeme_contents.begin = cur; - while (*cur && *cur != terminator) cur++; - _cur_lexeme_contents.end = cur; - - if (!*cur) - _cur_lexeme = lex_none; - else - { - cur += 1; - _cur_lexeme = lex_quoted_string; - } - - break; - } - - case ':': - if (*(cur+1) == ':') - { - cur += 2; - _cur_lexeme = lex_double_colon; - } - else - { - _cur_lexeme = lex_none; - } - break; - - default: - if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - if (*cur == '.') - { - cur++; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_number; - } - else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':') - { - if (cur[1] == '*') // namespace test ncname:* - { - cur += 2; // :* - } - else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname - { - cur++; // : - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_string; - } - else - { - _cur_lexeme = lex_none; - } - } - - _cur = cur; - } - - lexeme_t current() const - { - return _cur_lexeme; - } - - const char_t* current_pos() const - { - return _cur_lexeme_pos; - } - - const xpath_lexer_string& contents() const - { - assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); - - return _cur_lexeme_contents; - } - }; - - enum ast_type_t - { - ast_unknown, - ast_op_or, // left or right - ast_op_and, // left and right - ast_op_equal, // left = right - ast_op_not_equal, // left != right - ast_op_less, // left < right - ast_op_greater, // left > right - ast_op_less_or_equal, // left <= right - ast_op_greater_or_equal, // left >= right - ast_op_add, // left + right - ast_op_subtract, // left - right - ast_op_multiply, // left * right - ast_op_divide, // left / right - ast_op_mod, // left % right - ast_op_negate, // left - right - ast_op_union, // left | right - ast_predicate, // apply predicate to set; next points to next predicate - ast_filter, // select * from left where right - ast_string_constant, // string constant - ast_number_constant, // number constant - ast_variable, // variable - ast_func_last, // last() - ast_func_position, // position() - ast_func_count, // count(left) - ast_func_id, // id(left) - ast_func_local_name_0, // local-name() - ast_func_local_name_1, // local-name(left) - ast_func_namespace_uri_0, // namespace-uri() - ast_func_namespace_uri_1, // namespace-uri(left) - ast_func_name_0, // name() - ast_func_name_1, // name(left) - ast_func_string_0, // string() - ast_func_string_1, // string(left) - ast_func_concat, // concat(left, right, siblings) - ast_func_starts_with, // starts_with(left, right) - ast_func_contains, // contains(left, right) - ast_func_substring_before, // substring-before(left, right) - ast_func_substring_after, // substring-after(left, right) - ast_func_substring_2, // substring(left, right) - ast_func_substring_3, // substring(left, right, third) - ast_func_string_length_0, // string-length() - ast_func_string_length_1, // string-length(left) - ast_func_normalize_space_0, // normalize-space() - ast_func_normalize_space_1, // normalize-space(left) - ast_func_translate, // translate(left, right, third) - ast_func_boolean, // boolean(left) - ast_func_not, // not(left) - ast_func_true, // true() - ast_func_false, // false() - ast_func_lang, // lang(left) - ast_func_number_0, // number() - ast_func_number_1, // number(left) - ast_func_sum, // sum(left) - ast_func_floor, // floor(left) - ast_func_ceiling, // ceiling(left) - ast_func_round, // round(left) - ast_step, // process set left with step - ast_step_root, // select root node - - ast_opt_translate_table, // translate(left, right, third) where right/third are constants - ast_opt_compare_attribute // @name = 'string' - }; - - enum axis_t - { - axis_ancestor, - axis_ancestor_or_self, - axis_attribute, - axis_child, - axis_descendant, - axis_descendant_or_self, - axis_following, - axis_following_sibling, - axis_namespace, - axis_parent, - axis_preceding, - axis_preceding_sibling, - axis_self - }; - - enum nodetest_t - { - nodetest_none, - nodetest_name, - nodetest_type_node, - nodetest_type_comment, - nodetest_type_pi, - nodetest_type_text, - nodetest_pi, - nodetest_all, - nodetest_all_in_namespace - }; - - enum predicate_t - { - predicate_default, - predicate_posinv, - predicate_constant, - predicate_constant_one - }; - - enum nodeset_eval_t - { - nodeset_eval_all, - nodeset_eval_any, - nodeset_eval_first - }; - - template struct axis_to_type - { - static const axis_t axis; - }; - - template const axis_t axis_to_type::axis = N; - - class xpath_ast_node - { - private: - // node type - char _type; - char _rettype; - - // for ast_step - char _axis; - - // for ast_step/ast_predicate/ast_filter - char _test; - - // tree node structure - xpath_ast_node* _left; - xpath_ast_node* _right; - xpath_ast_node* _next; - - union - { - // value for ast_string_constant - const char_t* string; - // value for ast_number_constant - double number; - // variable for ast_variable - xpath_variable* variable; - // node test for ast_step (node name/namespace/node type/pi target) - const char_t* nodetest; - // table for ast_opt_translate_table - const unsigned char* table; - } _data; - - xpath_ast_node(const xpath_ast_node&); - xpath_ast_node& operator=(const xpath_ast_node&); - - template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - { - if (lt == xpath_type_boolean || rt == xpath_type_boolean) - return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); - else if (lt == xpath_type_number || rt == xpath_type_number) - return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); - else if (lt == xpath_type_string || rt == xpath_type_string) - { - xpath_allocator_capture cr(stack.result); - - xpath_string ls = lhs->eval_string(c, stack); - xpath_string rs = rhs->eval_string(c, stack); - - return comp(ls, rs); - } - } - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) - return true; - } - - return false; - } - else - { - if (lt == xpath_type_node_set) - { - swap(lhs, rhs); - swap(lt, rt); - } - - if (lt == xpath_type_boolean) - return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); - else if (lt == xpath_type_number) - { - xpath_allocator_capture cr(stack.result); - - double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_string) - { - xpath_allocator_capture cr(stack.result); - - xpath_string l = lhs->eval_string(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, string_value(*ri, stack.result))) - return true; - } - - return false; - } - } - - assert(!"Wrong types"); - return false; - } - - static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) - { - return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; - } - - template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - { - xpath_allocator_capture cri(stack.result); - - double l = convert_string_to_number(string_value(*li, stack.result).c_str()); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture crii(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - } - - return false; - } - else if (lt != xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_node_set && rt != xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); +struct xpath_context { + xpath_node n; + size_t position, size; + + xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) { + } +}; + +enum lexeme_t { + lex_none = 0, + lex_equal, + lex_not_equal, + lex_less, + lex_greater, + lex_less_or_equal, + lex_greater_or_equal, + lex_plus, + lex_minus, + lex_multiply, + lex_union, + lex_var_ref, + lex_open_brace, + lex_close_brace, + lex_quoted_string, + lex_number, + lex_slash, + lex_double_slash, + lex_open_square_brace, + lex_close_square_brace, + lex_string, + lex_comma, + lex_axis_attribute, + lex_dot, + lex_double_dot, + lex_double_colon, + lex_eof +}; + +struct xpath_lexer_string { + const char_t* begin; + const char_t* end; + + xpath_lexer_string(): begin(0), end(0) { + } + + bool operator==(const char_t* other) const { + size_t length = static_cast(end - begin); + + return strequalrange(other, begin, length); + } +}; + +class xpath_lexer +{ + const char_t* _cur; + const char_t* _cur_lexeme_pos; + xpath_lexer_string _cur_lexeme_contents; + + lexeme_t _cur_lexeme; + +public: + explicit xpath_lexer(const char_t* query): _cur(query) { + next(); + } + + const char_t* state() const { + return _cur; + } + + void next() { + const char_t* cur = _cur; + + while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; + + // save lexeme position for error reporting + _cur_lexeme_pos = cur; + + switch (*cur) { + case 0: + _cur_lexeme = lex_eof; + break; + + case '>': + if (*(cur+1) == '=') { + cur += 2; + _cur_lexeme = lex_greater_or_equal; + } else { + cur += 1; + _cur_lexeme = lex_greater; + } + break; + + case '<': + if (*(cur+1) == '=') { + cur += 2; + _cur_lexeme = lex_less_or_equal; + } else { + cur += 1; + _cur_lexeme = lex_less; + } + break; + + case '!': + if (*(cur+1) == '=') { + cur += 2; + _cur_lexeme = lex_not_equal; + } else { + _cur_lexeme = lex_none; + } + break; + + case '=': + cur += 1; + _cur_lexeme = lex_equal; + + break; + + case '+': + cur += 1; + _cur_lexeme = lex_plus; + + break; + + case '-': + cur += 1; + _cur_lexeme = lex_minus; - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - double r = rhs->eval_number(c, stack); + break; - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - { - xpath_allocator_capture cri(stack.result); + case '*': + cur += 1; + _cur_lexeme = lex_multiply; - if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) - return true; - } + break; - return false; - } - else - { - assert(!"Wrong types"); - return false; - } - } + case '|': + cur += 1; + _cur_lexeme = lex_union; - static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) - { - assert(ns.size() >= first); - assert(expr->rettype() != xpath_type_number); + break; - size_t i = 1; - size_t size = ns.size() - first; + case '$': + cur += 1; - xpath_node* last = ns.begin() + first; + if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) { + _cur_lexeme_contents.begin = cur; - // remove_if... or well, sort of - for (xpath_node* it = last; it != ns.end(); ++it, ++i) - { - xpath_context c(*it, i, size); + while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - if (expr->eval_boolean(c, stack)) - { - *last++ = *it; + if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // qname + cur++; // : - if (once) break; - } - } - - ns.truncate(last); - } - - static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) - { - assert(ns.size() >= first); - assert(expr->rettype() == xpath_type_number); - - size_t i = 1; - size_t size = ns.size() - first; - - xpath_node* last = ns.begin() + first; - - // remove_if... or well, sort of - for (xpath_node* it = last; it != ns.end(); ++it, ++i) - { - xpath_context c(*it, i, size); - - if (expr->eval_number(c, stack) == i) - { - *last++ = *it; - - if (once) break; - } - } - - ns.truncate(last); - } - - static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) - { - assert(ns.size() >= first); - assert(expr->rettype() == xpath_type_number); - - size_t size = ns.size() - first; - - xpath_node* last = ns.begin() + first; - - xpath_context c(xpath_node(), 1, size); - - double er = expr->eval_number(c, stack); - - if (er >= 1.0 && er <= size) - { - size_t eri = static_cast(er); - - if (er == eri) - { - xpath_node r = last[eri - 1]; - - *last++ = r; - } - } - - ns.truncate(last); - } - - void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) - { - if (ns.size() == first) return; - - assert(_type == ast_filter || _type == ast_predicate); - - if (_test == predicate_constant || _test == predicate_constant_one) - apply_predicate_number_const(ns, first, _right, stack); - else if (_right->rettype() == xpath_type_number) - apply_predicate_number(ns, first, _right, stack, once); - else - apply_predicate_boolean(ns, first, _right, stack, once); - } - - void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) - { - if (ns.size() == first) return; - - bool last_once = eval_once(ns.type(), eval); - - for (xpath_ast_node* pred = _right; pred; pred = pred->_next) - pred->apply_predicate(ns, first, stack, !pred->_next && last_once); - } - - bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) - { - assert(a); - - const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); - - switch (_test) - { - case nodetest_name: - if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - case nodetest_type_node: - case nodetest_all: - if (is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - case nodetest_all_in_namespace: - if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - default: - ; - } - - return false; - } - - bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) - { - assert(n); - - xml_node_type type = PUGI__NODETYPE(n); - - switch (_test) - { - case nodetest_name: - if (type == node_element && n->name && strequal(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_node: - ns.push_back(xml_node(n), alloc); - return true; - - case nodetest_type_comment: - if (type == node_comment) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_text: - if (type == node_pcdata || type == node_cdata) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_pi: - if (type == node_pi) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_pi: - if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_all: - if (type == node_element) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_all_in_namespace: - if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - default: - assert(!"Unknown axis"); - } - - return false; - } - - template void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_attribute: - { - for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) - if (step_push(ns, a, n, alloc) & once) - return; - - break; - } - - case axis_child: - { - for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_descendant: - case axis_descendant_or_self: - { - if (axis == axis_descendant_or_self) - if (step_push(ns, n, alloc) & once) - return; - - xml_node_struct* cur = n->first_child; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (cur == n) return; - } - - cur = cur->next_sibling; - } - } - - break; - } - - case axis_following_sibling: - { - for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_preceding_sibling: - { - for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_following: - { - xml_node_struct* cur = n; - - // exit from this node so that we don't include descendants - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - } - } - - break; - } - - case axis_preceding: - { - xml_node_struct* cur = n; - - // exit from this node so that we don't include descendants - while (!cur->prev_sibling_c->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->prev_sibling_c; - - while (cur) - { - if (cur->first_child) - cur = cur->first_child->prev_sibling_c; - else - { - // leaf node, can't be ancestor - if (step_push(ns, cur, alloc) & once) - return; - - while (!cur->prev_sibling_c->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - - if (!node_is_ancestor(cur, n)) - if (step_push(ns, cur, alloc) & once) - return; - } - - cur = cur->prev_sibling_c; - } - } - - break; - } - - case axis_ancestor: - case axis_ancestor_or_self: - { - if (axis == axis_ancestor_or_self) - if (step_push(ns, n, alloc) & once) - return; - - xml_node_struct* cur = n->parent; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - cur = cur->parent; - } - - break; - } - - case axis_self: - { - step_push(ns, n, alloc); - - break; - } - - case axis_parent: - { - if (n->parent) - step_push(ns, n->parent, alloc); - - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - { - if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test - if (step_push(ns, a, p, alloc) & once) - return; - - xml_node_struct* cur = p; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - cur = cur->parent; - } - - break; - } - - case axis_descendant_or_self: - case axis_self: - { - if (_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p, alloc); - - break; - } - - case axis_following: - { - xml_node_struct* cur = p; - - while (cur) - { - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - } - - if (step_push(ns, cur, alloc) & once) - return; - } - - break; - } - - case axis_parent: - { - step_push(ns, p, alloc); - - break; - } - - case axis_preceding: - { - // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, alloc, once, v); - break; - } - - default: - assert(!"Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) - { - const axis_t axis = T::axis; - const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); - - if (xn.node()) - step_fill(ns, xn.node().internal_object(), alloc, once, v); - else if (axis_has_attributes && xn.attribute() && xn.parent()) - step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); - } - - template xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) - { - const axis_t axis = T::axis; - const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); - const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; - - bool once = - (axis == axis_attribute && _test == nodetest_name) || - (!_right && eval_once(axis_type, eval)) || - (_right && !_right->_next && _right->_test == predicate_constant_one); - - xpath_node_set_raw ns; - ns.set_type(axis_type); - - if (_left) - { - xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); - - // self axis preserves the original order - if (axis == axis_self) ns.set_type(s.type()); - - for (const xpath_node* it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes - if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); - - step_fill(ns, *it, stack.result, once, v); - if (_right) apply_predicates(ns, size, stack, eval); - } - } - else - { - step_fill(ns, c.n, stack.result, once, v); - if (_right) apply_predicates(ns, 0, stack, eval); - } - - // child, attribute and self axes always generate unique set of nodes - // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice - if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) - ns.remove_duplicates(); - - return ns; - } - - public: - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_string_constant); - _data.string = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_number_constant); - _data.number = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_variable); - _data.variable = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) - { - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): - _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(static_cast(axis)), _test(static_cast(test)), _left(left), _right(0), _next(0) - { - assert(type == ast_step); - _data.nodetest = contents; - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): - _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast(test)), _left(left), _right(right), _next(0) - { - assert(type == ast_filter || type == ast_predicate); - } - - void set_next(xpath_ast_node* value) - { - _next = value; - } - - void set_right(xpath_ast_node* value) - { - _right = value; - } - - bool eval_boolean(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_op_or: - return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); - - case ast_op_and: - return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); - - case ast_op_equal: - return compare_eq(_left, _right, c, stack, equal_to()); - - case ast_op_not_equal: - return compare_eq(_left, _right, c, stack, not_equal_to()); - - case ast_op_less: - return compare_rel(_left, _right, c, stack, less()); - - case ast_op_greater: - return compare_rel(_right, _left, c, stack, less()); - - case ast_op_less_or_equal: - return compare_rel(_left, _right, c, stack, less_equal()); - - case ast_op_greater_or_equal: - return compare_rel(_right, _left, c, stack, less_equal()); - - case ast_func_starts_with: - { - xpath_allocator_capture cr(stack.result); - - xpath_string lr = _left->eval_string(c, stack); - xpath_string rr = _right->eval_string(c, stack); - - return starts_with(lr.c_str(), rr.c_str()); - } - - case ast_func_contains: - { - xpath_allocator_capture cr(stack.result); - - xpath_string lr = _left->eval_string(c, stack); - xpath_string rr = _right->eval_string(c, stack); - - return find_substring(lr.c_str(), rr.c_str()) != 0; - } - - case ast_func_boolean: - return _left->eval_boolean(c, stack); - - case ast_func_not: - return !_left->eval_boolean(c, stack); - - case ast_func_true: - return true; - - case ast_func_false: - return false; - - case ast_func_lang: - { - if (c.n.attribute()) return false; - - xpath_allocator_capture cr(stack.result); - - xpath_string lang = _left->eval_string(c, stack); - - for (xml_node n = c.n.node(); n; n = n.parent()) - { - xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); - - if (a) - { - const char_t* value = a.value(); - - // strnicmp / strncasecmp is not portable - for (const char_t* lit = lang.c_str(); *lit; ++lit) - { - if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; - ++value; - } - - return *value == 0 || *value == '-'; - } - } - - return false; - } - - case ast_opt_compare_attribute: - { - const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); - - xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); - - return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_boolean) - return _data.variable->get_boolean(); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_number: - return convert_number_to_boolean(eval_number(c, stack)); - - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); - - return !eval_string(c, stack).empty(); - } - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); - - return !eval_node_set(c, stack, nodeset_eval_any).empty(); - } - - default: - assert(!"Wrong expression for return type boolean"); - return false; - } - } - } - } - - double eval_number(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_op_add: - return _left->eval_number(c, stack) + _right->eval_number(c, stack); - - case ast_op_subtract: - return _left->eval_number(c, stack) - _right->eval_number(c, stack); - - case ast_op_multiply: - return _left->eval_number(c, stack) * _right->eval_number(c, stack); - - case ast_op_divide: - return _left->eval_number(c, stack) / _right->eval_number(c, stack); - - case ast_op_mod: - return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); - - case ast_op_negate: - return -_left->eval_number(c, stack); - - case ast_number_constant: - return _data.number; - - case ast_func_last: - return static_cast(c.size); - - case ast_func_position: - return static_cast(c.position); - - case ast_func_count: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(_left->eval_node_set(c, stack, nodeset_eval_all).size()); - } - - case ast_func_string_length_0: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(string_value(c.n, stack.result).length()); - } - - case ast_func_string_length_1: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(_left->eval_string(c, stack).length()); - } - - case ast_func_number_0: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(string_value(c.n, stack.result).c_str()); - } - - case ast_func_number_1: - return _left->eval_number(c, stack); - - case ast_func_sum: - { - xpath_allocator_capture cr(stack.result); - - double r = 0; - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) - { - xpath_allocator_capture cri(stack.result); - - r += convert_string_to_number(string_value(*it, stack.result).c_str()); - } - - return r; - } - - case ast_func_floor: - { - double r = _left->eval_number(c, stack); - - return r == r ? floor(r) : r; - } - - case ast_func_ceiling: - { - double r = _left->eval_number(c, stack); - - return r == r ? ceil(r) : r; - } - - case ast_func_round: - return round_nearest_nzero(_left->eval_number(c, stack)); - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_number) - return _data.variable->get_number(); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_boolean: - return eval_boolean(c, stack) ? 1 : 0; - - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(eval_string(c, stack).c_str()); - } - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(eval_string(c, stack).c_str()); - } - - default: - assert(!"Wrong expression for return type number"); - return 0; - } - - } - } - } - - xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) - { - assert(_type == ast_func_concat); - - xpath_allocator_capture ct(stack.temp); - - // count the string number - size_t count = 1; - for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; - - // gather all strings - xpath_string static_buffer[4]; - xpath_string* buffer = static_buffer; - - // allocate on-heap for large concats - if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) - { - buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); - assert(buffer); - } - - // evaluate all strings to temporary stack - xpath_stack swapped_stack = {stack.temp, stack.result}; - - buffer[0] = _left->eval_string(c, swapped_stack); - - size_t pos = 1; - for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); - assert(pos == count); - - // get total length - size_t length = 0; - for (size_t i = 0; i < count; ++i) length += buffer[i].length(); - - // create final string - char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); - assert(result); - - char_t* ri = result; - - for (size_t j = 0; j < count; ++j) - for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) - *ri++ = *bi; - - *ri = 0; - - return xpath_string::from_heap_preallocated(result, ri); - } - - xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_string_constant: - return xpath_string::from_const(_data.string); - - case ast_func_local_name_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(local_name(na)); - } - - case ast_func_local_name_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(local_name(na)); - } - - case ast_func_name_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(qualified_name(na)); - } - - case ast_func_name_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(qualified_name(na)); - } - - case ast_func_namespace_uri_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(namespace_uri(na)); - } - - case ast_func_namespace_uri_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(namespace_uri(na)); - } - - case ast_func_string_0: - return string_value(c.n, stack.result); - - case ast_func_string_1: - return _left->eval_string(c, stack); - - case ast_func_concat: - return eval_string_concat(c, stack); - - case ast_func_substring_before: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - xpath_string p = _right->eval_string(c, swapped_stack); - - const char_t* pos = find_substring(s.c_str(), p.c_str()); - - return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); - } - - case ast_func_substring_after: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - xpath_string p = _right->eval_string(c, swapped_stack); - - const char_t* pos = find_substring(s.c_str(), p.c_str()); - if (!pos) return xpath_string(); - - const char_t* rbegin = pos + p.length(); - const char_t* rend = s.c_str() + s.length(); - - return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); - } - - case ast_func_substring_2: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - size_t s_length = s.length(); - - double first = round_nearest(_right->eval_number(c, stack)); - - if (is_nan(first)) return xpath_string(); // NaN - else if (first >= s_length + 1) return xpath_string(); - - size_t pos = first < 1 ? 1 : static_cast(first); - assert(1 <= pos && pos <= s_length + 1); - - const char_t* rbegin = s.c_str() + (pos - 1); - const char_t* rend = s.c_str() + s.length(); - - return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); - } - - case ast_func_substring_3: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - size_t s_length = s.length(); - - double first = round_nearest(_right->eval_number(c, stack)); - double last = first + round_nearest(_right->_next->eval_number(c, stack)); - - if (is_nan(first) || is_nan(last)) return xpath_string(); - else if (first >= s_length + 1) return xpath_string(); - else if (first >= last) return xpath_string(); - else if (last < 1) return xpath_string(); - - size_t pos = first < 1 ? 1 : static_cast(first); - size_t end = last >= s_length + 1 ? s_length + 1 : static_cast(last); - - assert(1 <= pos && pos <= end && end <= s_length + 1); - const char_t* rbegin = s.c_str() + (pos - 1); - const char_t* rend = s.c_str() + (end - 1); - - return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); - } - - case ast_func_normalize_space_0: - { - xpath_string s = string_value(c.n, stack.result); - - char_t* begin = s.data(stack.result); - char_t* end = normalize_space(begin); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_func_normalize_space_1: - { - xpath_string s = _left->eval_string(c, stack); - - char_t* begin = s.data(stack.result); - char_t* end = normalize_space(begin); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_func_translate: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, stack); - xpath_string from = _right->eval_string(c, swapped_stack); - xpath_string to = _right->_next->eval_string(c, swapped_stack); - - char_t* begin = s.data(stack.result); - char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_opt_translate_table: - { - xpath_string s = _left->eval_string(c, stack); - - char_t* begin = s.data(stack.result); - char_t* end = translate_table(begin, _data.table); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_string) - return xpath_string::from_const(_data.variable->get_string()); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_boolean: - return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - - case xpath_type_number: - return convert_number_to_string(eval_number(c, stack), stack.result); - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); - return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); - } - - default: - assert(!"Wrong expression for return type string"); - return xpath_string(); - } - } - } - } - - xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) - { - switch (_type) - { - case ast_op_union: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); - xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); - - // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother - rs.set_type(xpath_node_set::type_unsorted); - - rs.append(ls.begin(), ls.end(), stack.result); - rs.remove_duplicates(); - - return rs; - } - - case ast_filter: - { - xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); - - // either expression is a number or it contains position() call; sort by document order - if (_test != predicate_posinv) set.sort_do(); - - bool once = eval_once(set.type(), eval); - - apply_predicate(set, 0, stack, once); - - return set; - } - - case ast_func_id: - return xpath_node_set_raw(); - - case ast_step: - { - switch (_axis) - { - case axis_ancestor: - return step_do(c, stack, eval, axis_to_type()); - - case axis_ancestor_or_self: - return step_do(c, stack, eval, axis_to_type()); - - case axis_attribute: - return step_do(c, stack, eval, axis_to_type()); - - case axis_child: - return step_do(c, stack, eval, axis_to_type()); - - case axis_descendant: - return step_do(c, stack, eval, axis_to_type()); - - case axis_descendant_or_self: - return step_do(c, stack, eval, axis_to_type()); - - case axis_following: - return step_do(c, stack, eval, axis_to_type()); - - case axis_following_sibling: - return step_do(c, stack, eval, axis_to_type()); - - case axis_namespace: - // namespaced axis is not supported - return xpath_node_set_raw(); - - case axis_parent: - return step_do(c, stack, eval, axis_to_type()); - - case axis_preceding: - return step_do(c, stack, eval, axis_to_type()); - - case axis_preceding_sibling: - return step_do(c, stack, eval, axis_to_type()); - - case axis_self: - return step_do(c, stack, eval, axis_to_type()); - - default: - assert(!"Unknown axis"); - return xpath_node_set_raw(); - } - } - - case ast_step_root: - { - assert(!_right); // root step can't have any predicates - - xpath_node_set_raw ns; - - ns.set_type(xpath_node_set::type_sorted); - - if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); - else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); - - return ns; - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_node_set) - { - const xpath_node_set& s = _data.variable->get_node_set(); - - xpath_node_set_raw ns; - - ns.set_type(s.type()); - ns.append(s.begin(), s.end(), stack.result); - - return ns; - } - - // fallthrough to type conversion - } - - default: - assert(!"Wrong expression for return type node set"); - return xpath_node_set_raw(); - } - } - - void optimize(xpath_allocator* alloc) - { - if (_left) _left->optimize(alloc); - if (_right) _right->optimize(alloc); - if (_next) _next->optimize(alloc); - - optimize_self(alloc); - } - - void optimize_self(xpath_allocator* alloc) - { - // Rewrite [position()=expr] with [expr] - // Note that this step has to go before classification to recognize [position()=1] - if ((_type == ast_filter || _type == ast_predicate) && - _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) - { - _right = _right->_right; - } - - // Classify filter/predicate ops to perform various optimizations during evaluation - if (_type == ast_filter || _type == ast_predicate) - { - assert(_test == predicate_default); - - if (_right->_type == ast_number_constant && _right->_data.number == 1.0) - _test = predicate_constant_one; - else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) - _test = predicate_constant; - else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) - _test = predicate_posinv; - } - - // Rewrite descendant-or-self::node()/child::foo with descendant::foo - // The former is a full form of //foo, the latter is much faster since it executes the node test immediately - // Do a similar kind of rewrite for self/descendant/descendant-or-self axes - // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) - if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && - _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && - is_posinv_step()) - { - if (_axis == axis_child || _axis == axis_descendant) - _axis = axis_descendant; - else - _axis = axis_descendant_or_self; - - _left = _left->_left; - } - - // Use optimized lookup table implementation for translate() with constant arguments - if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) - { - unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); - - if (table) - { - _type = ast_opt_translate_table; - _data.table = table; - } - } - - // Use optimized path for @attr = 'value' or @attr = $value - if (_type == ast_op_equal && - _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && - (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) - { - _type = ast_opt_compare_attribute; - } - } - - bool is_posinv_expr() const - { - switch (_type) - { - case ast_func_position: - case ast_func_last: - return false; - - case ast_string_constant: - case ast_number_constant: - case ast_variable: - return true; - - case ast_step: - case ast_step_root: - return true; - - case ast_predicate: - case ast_filter: - return true; - - default: - if (_left && !_left->is_posinv_expr()) return false; - - for (xpath_ast_node* n = _right; n; n = n->_next) - if (!n->is_posinv_expr()) return false; - - return true; - } - } - - bool is_posinv_step() const - { - assert(_type == ast_step); - - for (xpath_ast_node* n = _right; n; n = n->_next) - { - assert(n->_type == ast_predicate); - - if (n->_test != predicate_posinv) - return false; - } - - return true; - } - - xpath_value_type rettype() const - { - return static_cast(_rettype); - } - }; - - struct xpath_parser - { - xpath_allocator* _alloc; - xpath_lexer _lexer; - - const char_t* _query; - xpath_variable_set* _variables; - - xpath_parse_result* _result; - - char_t _scratch[32]; - - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error(const char* message) - { - _result->error = message; - _result->offset = _lexer.current_pos() - _query; - - #ifdef PUGIXML_NO_EXCEPTIONS - longjmp(_error_handler, 1); - #else - throw xpath_exception(*_result); - #endif - } - - void throw_error_oom() - { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif - } - - void* alloc_node() - { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); - - if (!result) throw_error_oom(); - - return result; - } - - const char_t* alloc_string(const xpath_lexer_string& value) - { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); - - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis - - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; - - return c; - } - else return 0; - } - - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); - } - - xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) - { - switch (name.begin[0]) - { - case 'b': - if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); - - break; - - case 'c': - if (name == PUGIXML_TEXT("count") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); - } - else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); - - break; - - case 'f': - if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); - else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); - - break; - - case 'i': - if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); - - break; - - case 'l': - if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); - else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); - - break; - - case 'n': - if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); - else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); - else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); - - break; - - case 'p': - if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); - - break; - - case 'r': - if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); - - break; - - case 's': - if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); - else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("sum") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); - } - - break; - - case 't': - if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); - - break; - - default: - break; - } - - throw_error("Unrecognized function or wrong parameter count"); - - return 0; - } - - axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) - { - specified = true; - - switch (name.begin[0]) - { - case 'a': - if (name == PUGIXML_TEXT("ancestor")) - return axis_ancestor; - else if (name == PUGIXML_TEXT("ancestor-or-self")) - return axis_ancestor_or_self; - else if (name == PUGIXML_TEXT("attribute")) - return axis_attribute; - - break; - - case 'c': - if (name == PUGIXML_TEXT("child")) - return axis_child; - - break; - - case 'd': - if (name == PUGIXML_TEXT("descendant")) - return axis_descendant; - else if (name == PUGIXML_TEXT("descendant-or-self")) - return axis_descendant_or_self; - - break; - - case 'f': - if (name == PUGIXML_TEXT("following")) - return axis_following; - else if (name == PUGIXML_TEXT("following-sibling")) - return axis_following_sibling; - - break; - - case 'n': - if (name == PUGIXML_TEXT("namespace")) - return axis_namespace; - - break; - - case 'p': - if (name == PUGIXML_TEXT("parent")) - return axis_parent; - else if (name == PUGIXML_TEXT("preceding")) - return axis_preceding; - else if (name == PUGIXML_TEXT("preceding-sibling")) - return axis_preceding_sibling; - - break; - - case 's': - if (name == PUGIXML_TEXT("self")) - return axis_self; - - break; - - default: - break; - } - - specified = false; - return axis_child; - } - - nodetest_t parse_node_test_type(const xpath_lexer_string& name) - { - switch (name.begin[0]) - { - case 'c': - if (name == PUGIXML_TEXT("comment")) - return nodetest_type_comment; - - break; - - case 'n': - if (name == PUGIXML_TEXT("node")) - return nodetest_type_node; - - break; - - case 'p': - if (name == PUGIXML_TEXT("processing-instruction")) - return nodetest_type_pi; - - break; - - case 't': - if (name == PUGIXML_TEXT("text")) - return nodetest_type_text; - - break; - - default: - break; - } - - return nodetest_none; - } - - // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall - xpath_ast_node* parse_primary_expression() - { - switch (_lexer.current()) - { - case lex_var_ref: - { - xpath_lexer_string name = _lexer.contents(); - - if (!_variables) - throw_error("Unknown variable: variable set is not provided"); - - xpath_variable* var = 0; - if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); - - if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); - - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); - } - - case lex_open_brace: - { - _lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); - - _lexer.next(); - - return n; - } - - case lex_quoted_string: - { - const char_t* value = alloc_string(_lexer.contents()); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); - _lexer.next(); - - return n; - } - - case lex_number: - { - double value = 0; - - if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); - _lexer.next(); - - return n; - } - - case lex_string: - { - xpath_ast_node* args[2] = {0}; - size_t argc = 0; - - xpath_lexer_string function = _lexer.contents(); - _lexer.next(); - - xpath_ast_node* last_arg = 0; - - if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); - _lexer.next(); - - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - - while (_lexer.current() != lex_close_brace) - { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (argc < 2) args[argc] = n; - else last_arg->set_next(n); - - argc++; - last_arg = n; - } - - _lexer.next(); - - return parse_function(function, argc, args); - } - - default: - throw_error("Unrecognizable primary expression"); - - return 0; - } - } - - // FilterExpr ::= PrimaryExpr | FilterExpr Predicate - // Predicate ::= '[' PredicateExpr ']' - // PredicateExpr ::= Expr - xpath_ast_node* parse_filter_expression() - { - xpath_ast_node* n = parse_primary_expression(); - - while (_lexer.current() == lex_open_square_brace) - { - _lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); - - if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); - - _lexer.next(); - } - - return n; - } - - // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep - // AxisSpecifier ::= AxisName '::' | '@'? - // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' - // NameTest ::= '*' | NCName ':' '*' | QName - // AbbreviatedStep ::= '.' | '..' - xpath_ast_node* parse_step(xpath_ast_node* set) - { - if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); - - bool axis_specified = false; - axis_t axis = axis_child; // implied child axis - - if (_lexer.current() == lex_axis_attribute) - { - axis = axis_attribute; - axis_specified = true; - - _lexer.next(); - } - else if (_lexer.current() == lex_dot) - { - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); - } - else if (_lexer.current() == lex_double_dot) - { - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); - } - - nodetest_t nt_type = nodetest_none; - xpath_lexer_string nt_name; - - if (_lexer.current() == lex_string) - { - // node name test - nt_name = _lexer.contents(); - _lexer.next(); - - // was it an axis name? - if (_lexer.current() == lex_double_colon) - { - // parse axis name - if (axis_specified) throw_error("Two axis specifiers in one step"); - - axis = parse_axis_name(nt_name, axis_specified); - - if (!axis_specified) throw_error("Unknown axis"); - - // read actual node test - _lexer.next(); - - if (_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - nt_name = xpath_lexer_string(); - _lexer.next(); - } - else if (_lexer.current() == lex_string) - { - nt_name = _lexer.contents(); - _lexer.next(); - } - else throw_error("Unrecognized node test"); - } - - if (nt_type == nodetest_none) - { - // node type test or processing-instruction - if (_lexer.current() == lex_open_brace) - { - _lexer.next(); - - if (_lexer.current() == lex_close_brace) - { - _lexer.next(); - - nt_type = parse_node_test_type(nt_name); - - if (nt_type == nodetest_none) throw_error("Unrecognized node type"); - - nt_name = xpath_lexer_string(); - } - else if (nt_name == PUGIXML_TEXT("processing-instruction")) - { - if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); - - nt_type = nodetest_pi; - nt_name = _lexer.contents(); - _lexer.next(); - - if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); - _lexer.next(); - } - else - throw_error("Unmatched brace near node type test"); - - } - // QName or NCName:* - else - { - if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* - { - nt_name.end--; // erase * - - nt_type = nodetest_all_in_namespace; - } - else nt_type = nodetest_name; - } - } - } - else if (_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - _lexer.next(); - } - else throw_error("Unrecognized node test"); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); - - xpath_ast_node* last = 0; - - while (_lexer.current() == lex_open_square_brace) - { - _lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); - - if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); - _lexer.next(); - - if (last) last->set_next(pred); - else n->set_right(pred); - - last = pred; - } - - return n; - } - - // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step - xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) - { - xpath_ast_node* n = parse_step(set); - - while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) - { - lexeme_t l = _lexer.current(); - _lexer.next(); - - if (l == lex_double_slash) - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - - n = parse_step(n); - } - - return n; - } - - // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath - // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath - xpath_ast_node* parse_location_path() - { - if (_lexer.current() == lex_slash) - { - _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - - // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path - lexeme_t l = _lexer.current(); - - if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) - return parse_relative_location_path(n); - else - return n; - } - else if (_lexer.current() == lex_double_slash) - { - _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - - return parse_relative_location_path(n); - } - - // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 - return parse_relative_location_path(0); - } - - // PathExpr ::= LocationPath - // | FilterExpr - // | FilterExpr '/' RelativeLocationPath - // | FilterExpr '//' RelativeLocationPath - // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr - // UnaryExpr ::= UnionExpr | '-' UnaryExpr - xpath_ast_node* parse_path_or_unary_expression() - { - // Clarification. - // PathExpr begins with either LocationPath or FilterExpr. - // FilterExpr begins with PrimaryExpr - // PrimaryExpr begins with '$' in case of it being a variable reference, - // '(' in case of it being an expression, string literal, number constant or - // function call. - - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || - _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || - _lexer.current() == lex_string) - { - if (_lexer.current() == lex_string) - { - // This is either a function call, or not - if not, we shall proceed with location path - const char_t* state = _lexer.state(); - - while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - - if (*state != '(') return parse_location_path(); - - // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); - } - - xpath_ast_node* n = parse_filter_expression(); - - if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) - { - lexeme_t l = _lexer.current(); - _lexer.next(); - - if (l == lex_double_slash) - { - if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - } - - // select from location path - return parse_relative_location_path(n); - } - - return n; - } - else if (_lexer.current() == lex_minus) - { - _lexer.next(); - - // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); - - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); - } - else - return parse_location_path(); - } - - struct binary_op_t - { - ast_type_t asttype; - xpath_value_type rettype; - int precedence; - - binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) - { - } - - binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) - { - } - - static binary_op_t parse(xpath_lexer& lexer) - { - switch (lexer.current()) - { - case lex_string: - if (lexer.contents() == PUGIXML_TEXT("or")) - return binary_op_t(ast_op_or, xpath_type_boolean, 1); - else if (lexer.contents() == PUGIXML_TEXT("and")) - return binary_op_t(ast_op_and, xpath_type_boolean, 2); - else if (lexer.contents() == PUGIXML_TEXT("div")) - return binary_op_t(ast_op_divide, xpath_type_number, 6); - else if (lexer.contents() == PUGIXML_TEXT("mod")) - return binary_op_t(ast_op_mod, xpath_type_number, 6); - else - return binary_op_t(); - - case lex_equal: - return binary_op_t(ast_op_equal, xpath_type_boolean, 3); - - case lex_not_equal: - return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); - - case lex_less: - return binary_op_t(ast_op_less, xpath_type_boolean, 4); - - case lex_greater: - return binary_op_t(ast_op_greater, xpath_type_boolean, 4); - - case lex_less_or_equal: - return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); - - case lex_greater_or_equal: - return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); - - case lex_plus: - return binary_op_t(ast_op_add, xpath_type_number, 5); - - case lex_minus: - return binary_op_t(ast_op_subtract, xpath_type_number, 5); - - case lex_multiply: - return binary_op_t(ast_op_multiply, xpath_type_number, 6); - - case lex_union: - return binary_op_t(ast_op_union, xpath_type_node_set, 7); - - default: - return binary_op_t(); - } - } - }; - - xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) - { - binary_op_t op = binary_op_t::parse(_lexer); - - while (op.asttype != ast_unknown && op.precedence >= limit) - { - _lexer.next(); - - xpath_ast_node* rhs = parse_path_or_unary_expression(); - - binary_op_t nextop = binary_op_t::parse(_lexer); - - while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) - { - rhs = parse_expression_rec(rhs, nextop.precedence); - - nextop = binary_op_t::parse(_lexer); - } - - if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); - - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); - - op = binary_op_t::parse(_lexer); - } - - return lhs; - } - - // Expr ::= OrExpr - // OrExpr ::= AndExpr | OrExpr 'or' AndExpr - // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr - // EqualityExpr ::= RelationalExpr - // | EqualityExpr '=' RelationalExpr - // | EqualityExpr '!=' RelationalExpr - // RelationalExpr ::= AdditiveExpr - // | RelationalExpr '<' AdditiveExpr - // | RelationalExpr '>' AdditiveExpr - // | RelationalExpr '<=' AdditiveExpr - // | RelationalExpr '>=' AdditiveExpr - // AdditiveExpr ::= MultiplicativeExpr - // | AdditiveExpr '+' MultiplicativeExpr - // | AdditiveExpr '-' MultiplicativeExpr - // MultiplicativeExpr ::= UnaryExpr - // | MultiplicativeExpr '*' UnaryExpr - // | MultiplicativeExpr 'div' UnaryExpr - // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() - { - return parse_expression_rec(parse_path_or_unary_expression(), 0); - } - - xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) - { - } - - xpath_ast_node* parse() - { - xpath_ast_node* result = parse_expression(); - - if (_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw_error("Incorrect query"); - } - - return result; - } - - static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) - { - xpath_parser parser(query, variables, alloc, result); - - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else - return parser.parse(); - #endif - } - }; - - struct xpath_query_impl - { - static xpath_query_impl* create() - { - void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); - if (!memory) return 0; - - return new (memory) xpath_query_impl(); - } - - static void destroy(xpath_query_impl* impl) - { - // free all allocated pages - impl->alloc.release(); - - // free allocator memory (with the first page) - xml_memory::deallocate(impl); - } - - xpath_query_impl(): root(0), alloc(&block) - { - block.next = 0; - block.capacity = sizeof(block.data); - } - - xpath_ast_node* root; - xpath_allocator alloc; - xpath_memory_block block; - }; - - PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) - { - if (!impl) return xpath_string(); - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - - xpath_context c(n, 1, 1); - - return impl->root->eval_string(c, sd.stack); - } - - PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) - { - if (!impl) return 0; - - if (impl->root->rettype() != xpath_type_node_set) - { - #ifdef PUGIXML_NO_EXCEPTIONS - return 0; - #else - xpath_parse_result res; - res.error = "Expression does not evaluate to node set"; - - throw xpath_exception(res); - #endif - } - - return impl->root; - } + while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + } + + _cur_lexeme_contents.end = cur; + + _cur_lexeme = lex_var_ref; + } else { + _cur_lexeme = lex_none; + } + + break; + + case '(': + cur += 1; + _cur_lexeme = lex_open_brace; + + break; + + case ')': + cur += 1; + _cur_lexeme = lex_close_brace; + + break; + + case '[': + cur += 1; + _cur_lexeme = lex_open_square_brace; + + break; + + case ']': + cur += 1; + _cur_lexeme = lex_close_square_brace; + + break; + + case ',': + cur += 1; + _cur_lexeme = lex_comma; + + break; + + case '/': + if (*(cur+1) == '/') { + cur += 2; + _cur_lexeme = lex_double_slash; + } else { + cur += 1; + _cur_lexeme = lex_slash; + } + break; + + case '.': + if (*(cur+1) == '.') { + cur += 2; + _cur_lexeme = lex_double_dot; + } else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) { + _cur_lexeme_contents.begin = cur; // . + + ++cur; + + while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + _cur_lexeme_contents.end = cur; + + _cur_lexeme = lex_number; + } else { + cur += 1; + _cur_lexeme = lex_dot; + } + break; + + case '@': + cur += 1; + _cur_lexeme = lex_axis_attribute; + + break; + + case '"': + case '\'': { + char_t terminator = *cur; + + ++cur; + + _cur_lexeme_contents.begin = cur; + while (*cur && *cur != terminator) cur++; + _cur_lexeme_contents.end = cur; + + if (!*cur) + _cur_lexeme = lex_none; + else { + cur += 1; + _cur_lexeme = lex_quoted_string; + } + + break; + } + + case ':': + if (*(cur+1) == ':') { + cur += 2; + _cur_lexeme = lex_double_colon; + } else { + _cur_lexeme = lex_none; + } + break; + + default: + if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) { + _cur_lexeme_contents.begin = cur; + + while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; + + if (*cur == '.') { + cur++; + + while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; + } + + _cur_lexeme_contents.end = cur; + + _cur_lexeme = lex_number; + } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) { + _cur_lexeme_contents.begin = cur; + + while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + + if (cur[0] == ':') { + if (cur[1] == '*') { // namespace test ncname:* + cur += 2; // :* + } else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // namespace test qname + cur++; // : + + while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; + } + } + + _cur_lexeme_contents.end = cur; + + _cur_lexeme = lex_string; + } else { + _cur_lexeme = lex_none; + } + } + + _cur = cur; + } + + lexeme_t current() const { + return _cur_lexeme; + } + + const char_t* current_pos() const { + return _cur_lexeme_pos; + } + + const xpath_lexer_string& contents() const { + assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); + + return _cur_lexeme_contents; + } +}; + +enum ast_type_t { + ast_unknown, + ast_op_or, // left or right + ast_op_and, // left and right + ast_op_equal, // left = right + ast_op_not_equal, // left != right + ast_op_less, // left < right + ast_op_greater, // left > right + ast_op_less_or_equal, // left <= right + ast_op_greater_or_equal, // left >= right + ast_op_add, // left + right + ast_op_subtract, // left - right + ast_op_multiply, // left * right + ast_op_divide, // left / right + ast_op_mod, // left % right + ast_op_negate, // left - right + ast_op_union, // left | right + ast_predicate, // apply predicate to set; next points to next predicate + ast_filter, // select * from left where right + ast_string_constant, // string constant + ast_number_constant, // number constant + ast_variable, // variable + ast_func_last, // last() + ast_func_position, // position() + ast_func_count, // count(left) + ast_func_id, // id(left) + ast_func_local_name_0, // local-name() + ast_func_local_name_1, // local-name(left) + ast_func_namespace_uri_0, // namespace-uri() + ast_func_namespace_uri_1, // namespace-uri(left) + ast_func_name_0, // name() + ast_func_name_1, // name(left) + ast_func_string_0, // string() + ast_func_string_1, // string(left) + ast_func_concat, // concat(left, right, siblings) + ast_func_starts_with, // starts_with(left, right) + ast_func_contains, // contains(left, right) + ast_func_substring_before, // substring-before(left, right) + ast_func_substring_after, // substring-after(left, right) + ast_func_substring_2, // substring(left, right) + ast_func_substring_3, // substring(left, right, third) + ast_func_string_length_0, // string-length() + ast_func_string_length_1, // string-length(left) + ast_func_normalize_space_0, // normalize-space() + ast_func_normalize_space_1, // normalize-space(left) + ast_func_translate, // translate(left, right, third) + ast_func_boolean, // boolean(left) + ast_func_not, // not(left) + ast_func_true, // true() + ast_func_false, // false() + ast_func_lang, // lang(left) + ast_func_number_0, // number() + ast_func_number_1, // number(left) + ast_func_sum, // sum(left) + ast_func_floor, // floor(left) + ast_func_ceiling, // ceiling(left) + ast_func_round, // round(left) + ast_step, // process set left with step + ast_step_root, // select root node + + ast_opt_translate_table, // translate(left, right, third) where right/third are constants + ast_opt_compare_attribute // @name = 'string' +}; + +enum axis_t { + axis_ancestor, + axis_ancestor_or_self, + axis_attribute, + axis_child, + axis_descendant, + axis_descendant_or_self, + axis_following, + axis_following_sibling, + axis_namespace, + axis_parent, + axis_preceding, + axis_preceding_sibling, + axis_self +}; + +enum nodetest_t { + nodetest_none, + nodetest_name, + nodetest_type_node, + nodetest_type_comment, + nodetest_type_pi, + nodetest_type_text, + nodetest_pi, + nodetest_all, + nodetest_all_in_namespace +}; + +enum predicate_t { + predicate_default, + predicate_posinv, + predicate_constant, + predicate_constant_one +}; + +enum nodeset_eval_t { + nodeset_eval_all, + nodeset_eval_any, + nodeset_eval_first +}; + +template struct axis_to_type { + static const axis_t axis; +}; + +template const axis_t axis_to_type::axis = N; + +class xpath_ast_node +{ +private: + // node type + char _type; + char _rettype; + + // for ast_step + char _axis; + + // for ast_step/ast_predicate/ast_filter + char _test; + + // tree node structure + xpath_ast_node* _left; + xpath_ast_node* _right; + xpath_ast_node* _next; + + union { + // value for ast_string_constant + const char_t* string; + // value for ast_number_constant + double number; + // variable for ast_variable + xpath_variable* variable; + // node test for ast_step (node name/namespace/node type/pi target) + const char_t* nodetest; + // table for ast_opt_translate_table + const unsigned char* table; + } _data; + + xpath_ast_node(const xpath_ast_node&); + xpath_ast_node& operator=(const xpath_ast_node&); + + template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) { + if (lt == xpath_type_boolean || rt == xpath_type_boolean) + return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); + else if (lt == xpath_type_number || rt == xpath_type_number) + return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); + else if (lt == xpath_type_string || rt == xpath_type_string) { + xpath_allocator_capture cr(stack.result); + + xpath_string ls = lhs->eval_string(c, stack); + xpath_string rs = rhs->eval_string(c, stack); + + return comp(ls, rs); + } + } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) + for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { + xpath_allocator_capture cri(stack.result); + + if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) + return true; + } + + return false; + } else { + if (lt == xpath_type_node_set) { + swap(lhs, rhs); + swap(lt, rt); + } + + if (lt == xpath_type_boolean) + return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); + else if (lt == xpath_type_number) { + xpath_allocator_capture cr(stack.result); + + double l = lhs->eval_number(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { + xpath_allocator_capture cri(stack.result); + + if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) + return true; + } + + return false; + } else if (lt == xpath_type_string) { + xpath_allocator_capture cr(stack.result); + + xpath_string l = lhs->eval_string(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { + xpath_allocator_capture cri(stack.result); + + if (comp(l, string_value(*ri, stack.result))) + return true; + } + + return false; + } + } + + assert(!"Wrong types"); + return false; + } + + static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) { + return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; + } + + template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) { + xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); + + if (lt != xpath_type_node_set && rt != xpath_type_node_set) + return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); + else if (lt == xpath_type_node_set && rt == xpath_type_node_set) { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) { + xpath_allocator_capture cri(stack.result); + + double l = convert_string_to_number(string_value(*li, stack.result).c_str()); + + for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { + xpath_allocator_capture crii(stack.result); + + if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) + return true; + } + } + + return false; + } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) { + xpath_allocator_capture cr(stack.result); + + double l = lhs->eval_number(c, stack); + xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) { + xpath_allocator_capture cri(stack.result); + + if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) + return true; + } + + return false; + } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); + double r = rhs->eval_number(c, stack); + + for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) { + xpath_allocator_capture cri(stack.result); + + if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) + return true; + } + + return false; + } else { + assert(!"Wrong types"); + return false; + } + } + + static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) { + assert(ns.size() >= first); + assert(expr->rettype() != xpath_type_number); + + size_t i = 1; + size_t size = ns.size() - first; + + xpath_node* last = ns.begin() + first; + + // remove_if... or well, sort of + for (xpath_node* it = last; it != ns.end(); ++it, ++i) { + xpath_context c(*it, i, size); + + if (expr->eval_boolean(c, stack)) { + *last++ = *it; + + if (once) break; + } + } + + ns.truncate(last); + } + + static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) { + assert(ns.size() >= first); + assert(expr->rettype() == xpath_type_number); + + size_t i = 1; + size_t size = ns.size() - first; + + xpath_node* last = ns.begin() + first; + + // remove_if... or well, sort of + for (xpath_node* it = last; it != ns.end(); ++it, ++i) { + xpath_context c(*it, i, size); + + if (expr->eval_number(c, stack) == i) { + *last++ = *it; + + if (once) break; + } + } + + ns.truncate(last); + } + + static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) { + assert(ns.size() >= first); + assert(expr->rettype() == xpath_type_number); + + size_t size = ns.size() - first; + + xpath_node* last = ns.begin() + first; + + xpath_context c(xpath_node(), 1, size); + + double er = expr->eval_number(c, stack); + + if (er >= 1.0 && er <= size) { + size_t eri = static_cast(er); + + if (er == eri) { + xpath_node r = last[eri - 1]; + + *last++ = r; + } + } + + ns.truncate(last); + } + + void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) { + if (ns.size() == first) return; + + assert(_type == ast_filter || _type == ast_predicate); + + if (_test == predicate_constant || _test == predicate_constant_one) + apply_predicate_number_const(ns, first, _right, stack); + else if (_right->rettype() == xpath_type_number) + apply_predicate_number(ns, first, _right, stack, once); + else + apply_predicate_boolean(ns, first, _right, stack, once); + } + + void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) { + if (ns.size() == first) return; + + bool last_once = eval_once(ns.type(), eval); + + for (xpath_ast_node* pred = _right; pred; pred = pred->_next) + pred->apply_predicate(ns, first, stack, !pred->_next && last_once); + } + + bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) { + assert(a); + + const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); + + switch (_test) { + case nodetest_name: + if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) { + ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); + return true; + } + break; + + case nodetest_type_node: + case nodetest_all: + if (is_xpath_attribute(name)) { + ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); + return true; + } + break; + + case nodetest_all_in_namespace: + if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) { + ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); + return true; + } + break; + + default: + ; + } + + return false; + } + + bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) { + assert(n); + + xml_node_type type = PUGI__NODETYPE(n); + + switch (_test) { + case nodetest_name: + if (type == node_element && n->name && strequal(n->name, _data.nodetest)) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_type_node: + ns.push_back(xml_node(n), alloc); + return true; + + case nodetest_type_comment: + if (type == node_comment) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_type_text: + if (type == node_pcdata || type == node_cdata) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_type_pi: + if (type == node_pi) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_pi: + if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_all: + if (type == node_element) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + case nodetest_all_in_namespace: + if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) { + ns.push_back(xml_node(n), alloc); + return true; + } + break; + + default: + assert(!"Unknown axis"); + } + + return false; + } + + template void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) { + const axis_t axis = T::axis; + + switch (axis) { + case axis_attribute: { + for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) + if (step_push(ns, a, n, alloc) & once) + return; + + break; + } + + case axis_child: { + for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) + if (step_push(ns, c, alloc) & once) + return; + + break; + } + + case axis_descendant: + case axis_descendant_or_self: { + if (axis == axis_descendant_or_self) + if (step_push(ns, n, alloc) & once) + return; + + xml_node_struct* cur = n->first_child; + + while (cur) { + if (step_push(ns, cur, alloc) & once) + return; + + if (cur->first_child) + cur = cur->first_child; + else { + while (!cur->next_sibling) { + cur = cur->parent; + + if (cur == n) return; + } + + cur = cur->next_sibling; + } + } + + break; + } + + case axis_following_sibling: { + for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) + if (step_push(ns, c, alloc) & once) + return; + + break; + } + + case axis_preceding_sibling: { + for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) + if (step_push(ns, c, alloc) & once) + return; + + break; + } + + case axis_following: { + xml_node_struct* cur = n; + + // exit from this node so that we don't include descendants + while (!cur->next_sibling) { + cur = cur->parent; + + if (!cur) return; + } + + cur = cur->next_sibling; + + while (cur) { + if (step_push(ns, cur, alloc) & once) + return; + + if (cur->first_child) + cur = cur->first_child; + else { + while (!cur->next_sibling) { + cur = cur->parent; + + if (!cur) return; + } + + cur = cur->next_sibling; + } + } + + break; + } + + case axis_preceding: { + xml_node_struct* cur = n; + + // exit from this node so that we don't include descendants + while (!cur->prev_sibling_c->next_sibling) { + cur = cur->parent; + + if (!cur) return; + } + + cur = cur->prev_sibling_c; + + while (cur) { + if (cur->first_child) + cur = cur->first_child->prev_sibling_c; + else { + // leaf node, can't be ancestor + if (step_push(ns, cur, alloc) & once) + return; + + while (!cur->prev_sibling_c->next_sibling) { + cur = cur->parent; + + if (!cur) return; + + if (!node_is_ancestor(cur, n)) + if (step_push(ns, cur, alloc) & once) + return; + } + + cur = cur->prev_sibling_c; + } + } + + break; + } + + case axis_ancestor: + case axis_ancestor_or_self: { + if (axis == axis_ancestor_or_self) + if (step_push(ns, n, alloc) & once) + return; + + xml_node_struct* cur = n->parent; + + while (cur) { + if (step_push(ns, cur, alloc) & once) + return; + + cur = cur->parent; + } + + break; + } + + case axis_self: { + step_push(ns, n, alloc); + + break; + } + + case axis_parent: { + if (n->parent) + step_push(ns, n->parent, alloc); + + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) { + const axis_t axis = T::axis; + + switch (axis) { + case axis_ancestor: + case axis_ancestor_or_self: { + if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test + if (step_push(ns, a, p, alloc) & once) + return; + + xml_node_struct* cur = p; + + while (cur) { + if (step_push(ns, cur, alloc) & once) + return; + + cur = cur->parent; + } + + break; + } + + case axis_descendant_or_self: + case axis_self: { + if (_test == nodetest_type_node) // reject attributes based on principal node type test + step_push(ns, a, p, alloc); + + break; + } + + case axis_following: { + xml_node_struct* cur = p; + + while (cur) { + if (cur->first_child) + cur = cur->first_child; + else { + while (!cur->next_sibling) { + cur = cur->parent; + + if (!cur) return; + } + + cur = cur->next_sibling; + } + + if (step_push(ns, cur, alloc) & once) + return; + } + + break; + } + + case axis_parent: { + step_push(ns, p, alloc); + + break; + } + + case axis_preceding: { + // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding + step_fill(ns, p, alloc, once, v); + break; + } + + default: + assert(!"Unimplemented axis"); + } + } + + template void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) { + const axis_t axis = T::axis; + const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); + + if (xn.node()) + step_fill(ns, xn.node().internal_object(), alloc, once, v); + else if (axis_has_attributes && xn.attribute() && xn.parent()) + step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); + } + + template xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) { + const axis_t axis = T::axis; + const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); + const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; + + bool once = + (axis == axis_attribute && _test == nodetest_name) || + (!_right && eval_once(axis_type, eval)) || + (_right && !_right->_next && _right->_test == predicate_constant_one); + + xpath_node_set_raw ns; + ns.set_type(axis_type); + + if (_left) { + xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); + + // self axis preserves the original order + if (axis == axis_self) ns.set_type(s.type()); + + for (const xpath_node* it = s.begin(); it != s.end(); ++it) { + size_t size = ns.size(); + + // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes + if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); + + step_fill(ns, *it, stack.result, once, v); + if (_right) apply_predicates(ns, size, stack, eval); + } + } else { + step_fill(ns, c.n, stack.result, once, v); + if (_right) apply_predicates(ns, 0, stack, eval); + } + + // child, attribute and self axes always generate unique set of nodes + // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice + if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) + ns.remove_duplicates(); + + return ns; + } + +public: + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): + _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) { + assert(type == ast_string_constant); + _data.string = value; + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): + _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) { + assert(type == ast_number_constant); + _data.number = value; + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): + _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) { + assert(type == ast_variable); + _data.variable = value; + } + + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): + _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) { + } + + xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): + _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(static_cast(axis)), _test(static_cast(test)), _left(left), _right(0), _next(0) { + assert(type == ast_step); + _data.nodetest = contents; + } + + xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): + _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast(test)), _left(left), _right(right), _next(0) { + assert(type == ast_filter || type == ast_predicate); + } + + void set_next(xpath_ast_node* value) { + _next = value; + } + + void set_right(xpath_ast_node* value) { + _right = value; + } + + bool eval_boolean(const xpath_context& c, const xpath_stack& stack) { + switch (_type) { + case ast_op_or: + return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); + + case ast_op_and: + return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); + + case ast_op_equal: + return compare_eq(_left, _right, c, stack, equal_to()); + + case ast_op_not_equal: + return compare_eq(_left, _right, c, stack, not_equal_to()); + + case ast_op_less: + return compare_rel(_left, _right, c, stack, less()); + + case ast_op_greater: + return compare_rel(_right, _left, c, stack, less()); + + case ast_op_less_or_equal: + return compare_rel(_left, _right, c, stack, less_equal()); + + case ast_op_greater_or_equal: + return compare_rel(_right, _left, c, stack, less_equal()); + + case ast_func_starts_with: { + xpath_allocator_capture cr(stack.result); + + xpath_string lr = _left->eval_string(c, stack); + xpath_string rr = _right->eval_string(c, stack); + + return starts_with(lr.c_str(), rr.c_str()); + } + + case ast_func_contains: { + xpath_allocator_capture cr(stack.result); + + xpath_string lr = _left->eval_string(c, stack); + xpath_string rr = _right->eval_string(c, stack); + + return find_substring(lr.c_str(), rr.c_str()) != 0; + } + + case ast_func_boolean: + return _left->eval_boolean(c, stack); + + case ast_func_not: + return !_left->eval_boolean(c, stack); + + case ast_func_true: + return true; + + case ast_func_false: + return false; + + case ast_func_lang: { + if (c.n.attribute()) return false; + + xpath_allocator_capture cr(stack.result); + + xpath_string lang = _left->eval_string(c, stack); + + for (xml_node n = c.n.node(); n; n = n.parent()) { + xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); + + if (a) { + const char_t* value = a.value(); + + // strnicmp / strncasecmp is not portable + for (const char_t* lit = lang.c_str(); *lit; ++lit) { + if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; + ++value; + } + + return *value == 0 || *value == '-'; + } + } + + return false; + } + + case ast_opt_compare_attribute: { + const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); + + xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); + + return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); + } + + case ast_variable: { + assert(_rettype == _data.variable->type()); + + if (_rettype == xpath_type_boolean) + return _data.variable->get_boolean(); + + // fallthrough to type conversion + } + + default: { + switch (_rettype) { + case xpath_type_number: + return convert_number_to_boolean(eval_number(c, stack)); + + case xpath_type_string: { + xpath_allocator_capture cr(stack.result); + + return !eval_string(c, stack).empty(); + } + + case xpath_type_node_set: { + xpath_allocator_capture cr(stack.result); + + return !eval_node_set(c, stack, nodeset_eval_any).empty(); + } + + default: + assert(!"Wrong expression for return type boolean"); + return false; + } + } + } + } + + double eval_number(const xpath_context& c, const xpath_stack& stack) { + switch (_type) { + case ast_op_add: + return _left->eval_number(c, stack) + _right->eval_number(c, stack); + + case ast_op_subtract: + return _left->eval_number(c, stack) - _right->eval_number(c, stack); + + case ast_op_multiply: + return _left->eval_number(c, stack) * _right->eval_number(c, stack); + + case ast_op_divide: + return _left->eval_number(c, stack) / _right->eval_number(c, stack); + + case ast_op_mod: + return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); + + case ast_op_negate: + return -_left->eval_number(c, stack); + + case ast_number_constant: + return _data.number; + + case ast_func_last: + return static_cast(c.size); + + case ast_func_position: + return static_cast(c.position); + + case ast_func_count: { + xpath_allocator_capture cr(stack.result); + + return static_cast(_left->eval_node_set(c, stack, nodeset_eval_all).size()); + } + + case ast_func_string_length_0: { + xpath_allocator_capture cr(stack.result); + + return static_cast(string_value(c.n, stack.result).length()); + } + + case ast_func_string_length_1: { + xpath_allocator_capture cr(stack.result); + + return static_cast(_left->eval_string(c, stack).length()); + } + + case ast_func_number_0: { + xpath_allocator_capture cr(stack.result); + + return convert_string_to_number(string_value(c.n, stack.result).c_str()); + } + + case ast_func_number_1: + return _left->eval_number(c, stack); + + case ast_func_sum: { + xpath_allocator_capture cr(stack.result); + + double r = 0; + + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); + + for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) { + xpath_allocator_capture cri(stack.result); + + r += convert_string_to_number(string_value(*it, stack.result).c_str()); + } + + return r; + } + + case ast_func_floor: { + double r = _left->eval_number(c, stack); + + return r == r ? floor(r) : r; + } + + case ast_func_ceiling: { + double r = _left->eval_number(c, stack); + + return r == r ? ceil(r) : r; + } + + case ast_func_round: + return round_nearest_nzero(_left->eval_number(c, stack)); + + case ast_variable: { + assert(_rettype == _data.variable->type()); + + if (_rettype == xpath_type_number) + return _data.variable->get_number(); + + // fallthrough to type conversion + } + + default: { + switch (_rettype) { + case xpath_type_boolean: + return eval_boolean(c, stack) ? 1 : 0; + + case xpath_type_string: { + xpath_allocator_capture cr(stack.result); + + return convert_string_to_number(eval_string(c, stack).c_str()); + } + + case xpath_type_node_set: { + xpath_allocator_capture cr(stack.result); + + return convert_string_to_number(eval_string(c, stack).c_str()); + } + + default: + assert(!"Wrong expression for return type number"); + return 0; + } + + } + } + } + + xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) { + assert(_type == ast_func_concat); + + xpath_allocator_capture ct(stack.temp); + + // count the string number + size_t count = 1; + for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; + + // gather all strings + xpath_string static_buffer[4]; + xpath_string* buffer = static_buffer; + + // allocate on-heap for large concats + if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { + buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); + assert(buffer); + } + + // evaluate all strings to temporary stack + xpath_stack swapped_stack = {stack.temp, stack.result}; + + buffer[0] = _left->eval_string(c, swapped_stack); + + size_t pos = 1; + for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); + assert(pos == count); + + // get total length + size_t length = 0; + for (size_t i = 0; i < count; ++i) length += buffer[i].length(); + + // create final string + char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); + assert(result); + + char_t* ri = result; + + for (size_t j = 0; j < count; ++j) + for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) + *ri++ = *bi; + + *ri = 0; + + return xpath_string::from_heap_preallocated(result, ri); + } + + xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) { + switch (_type) { + case ast_string_constant: + return xpath_string::from_const(_data.string); + + case ast_func_local_name_0: { + xpath_node na = c.n; + + return xpath_string::from_const(local_name(na)); + } + + case ast_func_local_name_1: { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); + xpath_node na = ns.first(); + + return xpath_string::from_const(local_name(na)); + } + + case ast_func_name_0: { + xpath_node na = c.n; + + return xpath_string::from_const(qualified_name(na)); + } + + case ast_func_name_1: { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); + xpath_node na = ns.first(); + + return xpath_string::from_const(qualified_name(na)); + } + + case ast_func_namespace_uri_0: { + xpath_node na = c.n; + + return xpath_string::from_const(namespace_uri(na)); + } + + case ast_func_namespace_uri_1: { + xpath_allocator_capture cr(stack.result); + + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); + xpath_node na = ns.first(); + + return xpath_string::from_const(namespace_uri(na)); + } + + case ast_func_string_0: + return string_value(c.n, stack.result); + + case ast_func_string_1: + return _left->eval_string(c, stack); + + case ast_func_concat: + return eval_string_concat(c, stack); + + case ast_func_substring_before: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_string s = _left->eval_string(c, swapped_stack); + xpath_string p = _right->eval_string(c, swapped_stack); + + const char_t* pos = find_substring(s.c_str(), p.c_str()); + + return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); + } + + case ast_func_substring_after: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_string s = _left->eval_string(c, swapped_stack); + xpath_string p = _right->eval_string(c, swapped_stack); + + const char_t* pos = find_substring(s.c_str(), p.c_str()); + if (!pos) return xpath_string(); + + const char_t* rbegin = pos + p.length(); + const char_t* rend = s.c_str() + s.length(); + + return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); + } + + case ast_func_substring_2: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_string s = _left->eval_string(c, swapped_stack); + size_t s_length = s.length(); + + double first = round_nearest(_right->eval_number(c, stack)); + + if (is_nan(first)) return xpath_string(); // NaN + else if (first >= s_length + 1) return xpath_string(); + + size_t pos = first < 1 ? 1 : static_cast(first); + assert(1 <= pos && pos <= s_length + 1); + + const char_t* rbegin = s.c_str() + (pos - 1); + const char_t* rend = s.c_str() + s.length(); + + return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); + } + + case ast_func_substring_3: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_string s = _left->eval_string(c, swapped_stack); + size_t s_length = s.length(); + + double first = round_nearest(_right->eval_number(c, stack)); + double last = first + round_nearest(_right->_next->eval_number(c, stack)); + + if (is_nan(first) || is_nan(last)) return xpath_string(); + else if (first >= s_length + 1) return xpath_string(); + else if (first >= last) return xpath_string(); + else if (last < 1) return xpath_string(); + + size_t pos = first < 1 ? 1 : static_cast(first); + size_t end = last >= s_length + 1 ? s_length + 1 : static_cast(last); + + assert(1 <= pos && pos <= end && end <= s_length + 1); + const char_t* rbegin = s.c_str() + (pos - 1); + const char_t* rend = s.c_str() + (end - 1); + + return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); + } + + case ast_func_normalize_space_0: { + xpath_string s = string_value(c.n, stack.result); + + char_t* begin = s.data(stack.result); + char_t* end = normalize_space(begin); + + return xpath_string::from_heap_preallocated(begin, end); + } + + case ast_func_normalize_space_1: { + xpath_string s = _left->eval_string(c, stack); + + char_t* begin = s.data(stack.result); + char_t* end = normalize_space(begin); + + return xpath_string::from_heap_preallocated(begin, end); + } + + case ast_func_translate: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_string s = _left->eval_string(c, stack); + xpath_string from = _right->eval_string(c, swapped_stack); + xpath_string to = _right->_next->eval_string(c, swapped_stack); + + char_t* begin = s.data(stack.result); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); + + return xpath_string::from_heap_preallocated(begin, end); + } + + case ast_opt_translate_table: { + xpath_string s = _left->eval_string(c, stack); + + char_t* begin = s.data(stack.result); + char_t* end = translate_table(begin, _data.table); + + return xpath_string::from_heap_preallocated(begin, end); + } + + case ast_variable: { + assert(_rettype == _data.variable->type()); + + if (_rettype == xpath_type_string) + return xpath_string::from_const(_data.variable->get_string()); + + // fallthrough to type conversion + } + + default: { + switch (_rettype) { + case xpath_type_boolean: + return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); + + case xpath_type_number: + return convert_number_to_string(eval_number(c, stack), stack.result); + + case xpath_type_node_set: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); + return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); + } + + default: + assert(!"Wrong expression for return type string"); + return xpath_string(); + } + } + } + } + + xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) { + switch (_type) { + case ast_op_union: { + xpath_allocator_capture cr(stack.temp); + + xpath_stack swapped_stack = {stack.temp, stack.result}; + + xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); + xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); + + // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother + rs.set_type(xpath_node_set::type_unsorted); + + rs.append(ls.begin(), ls.end(), stack.result); + rs.remove_duplicates(); + + return rs; + } + + case ast_filter: { + xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); + + // either expression is a number or it contains position() call; sort by document order + if (_test != predicate_posinv) set.sort_do(); + + bool once = eval_once(set.type(), eval); + + apply_predicate(set, 0, stack, once); + + return set; + } + + case ast_func_id: + return xpath_node_set_raw(); + + case ast_step: { + switch (_axis) { + case axis_ancestor: + return step_do(c, stack, eval, axis_to_type()); + + case axis_ancestor_or_self: + return step_do(c, stack, eval, axis_to_type()); + + case axis_attribute: + return step_do(c, stack, eval, axis_to_type()); + + case axis_child: + return step_do(c, stack, eval, axis_to_type()); + + case axis_descendant: + return step_do(c, stack, eval, axis_to_type()); + + case axis_descendant_or_self: + return step_do(c, stack, eval, axis_to_type()); + + case axis_following: + return step_do(c, stack, eval, axis_to_type()); + + case axis_following_sibling: + return step_do(c, stack, eval, axis_to_type()); + + case axis_namespace: + // namespaced axis is not supported + return xpath_node_set_raw(); + + case axis_parent: + return step_do(c, stack, eval, axis_to_type()); + + case axis_preceding: + return step_do(c, stack, eval, axis_to_type()); + + case axis_preceding_sibling: + return step_do(c, stack, eval, axis_to_type()); + + case axis_self: + return step_do(c, stack, eval, axis_to_type()); + + default: + assert(!"Unknown axis"); + return xpath_node_set_raw(); + } + } + + case ast_step_root: { + assert(!_right); // root step can't have any predicates + + xpath_node_set_raw ns; + + ns.set_type(xpath_node_set::type_sorted); + + if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); + else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); + + return ns; + } + + case ast_variable: { + assert(_rettype == _data.variable->type()); + + if (_rettype == xpath_type_node_set) { + const xpath_node_set& s = _data.variable->get_node_set(); + + xpath_node_set_raw ns; + + ns.set_type(s.type()); + ns.append(s.begin(), s.end(), stack.result); + + return ns; + } + + // fallthrough to type conversion + } + + default: + assert(!"Wrong expression for return type node set"); + return xpath_node_set_raw(); + } + } + + void optimize(xpath_allocator* alloc) { + if (_left) _left->optimize(alloc); + if (_right) _right->optimize(alloc); + if (_next) _next->optimize(alloc); + + optimize_self(alloc); + } + + void optimize_self(xpath_allocator* alloc) { + // Rewrite [position()=expr] with [expr] + // Note that this step has to go before classification to recognize [position()=1] + if ((_type == ast_filter || _type == ast_predicate) && + _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) { + _right = _right->_right; + } + + // Classify filter/predicate ops to perform various optimizations during evaluation + if (_type == ast_filter || _type == ast_predicate) { + assert(_test == predicate_default); + + if (_right->_type == ast_number_constant && _right->_data.number == 1.0) + _test = predicate_constant_one; + else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) + _test = predicate_constant; + else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) + _test = predicate_posinv; + } + + // Rewrite descendant-or-self::node()/child::foo with descendant::foo + // The former is a full form of //foo, the latter is much faster since it executes the node test immediately + // Do a similar kind of rewrite for self/descendant/descendant-or-self axes + // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) + if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && + _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && + is_posinv_step()) { + if (_axis == axis_child || _axis == axis_descendant) + _axis = axis_descendant; + else + _axis = axis_descendant_or_self; + + _left = _left->_left; + } + + // Use optimized lookup table implementation for translate() with constant arguments + if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) { + unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); + + if (table) { + _type = ast_opt_translate_table; + _data.table = table; + } + } + + // Use optimized path for @attr = 'value' or @attr = $value + if (_type == ast_op_equal && + _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && + (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) { + _type = ast_opt_compare_attribute; + } + } + + bool is_posinv_expr() const { + switch (_type) { + case ast_func_position: + case ast_func_last: + return false; + + case ast_string_constant: + case ast_number_constant: + case ast_variable: + return true; + + case ast_step: + case ast_step_root: + return true; + + case ast_predicate: + case ast_filter: + return true; + + default: + if (_left && !_left->is_posinv_expr()) return false; + + for (xpath_ast_node* n = _right; n; n = n->_next) + if (!n->is_posinv_expr()) return false; + + return true; + } + } + + bool is_posinv_step() const { + assert(_type == ast_step); + + for (xpath_ast_node* n = _right; n; n = n->_next) { + assert(n->_type == ast_predicate); + + if (n->_test != predicate_posinv) + return false; + } + + return true; + } + + xpath_value_type rettype() const { + return static_cast(_rettype); + } +}; + +struct xpath_parser { + xpath_allocator* _alloc; + xpath_lexer _lexer; + + const char_t* _query; + xpath_variable_set* _variables; + + xpath_parse_result* _result; + + char_t _scratch[32]; + +#ifdef PUGIXML_NO_EXCEPTIONS + jmp_buf _error_handler; +#endif + + void throw_error(const char* message) { + _result->error = message; + _result->offset = _lexer.current_pos() - _query; + +#ifdef PUGIXML_NO_EXCEPTIONS + longjmp(_error_handler, 1); +#else + throw xpath_exception(*_result); +#endif + } + + void throw_error_oom() { +#ifdef PUGIXML_NO_EXCEPTIONS + throw_error("Out of memory"); +#else + throw std::bad_alloc(); +#endif + } + + void* alloc_node() { + void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); + + if (!result) throw_error_oom(); + + return result; + } + + const char_t* alloc_string(const xpath_lexer_string& value) { + if (value.begin) { + size_t length = static_cast(value.end - value.begin); + + char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); + if (!c) throw_error_oom(); + assert(c); // workaround for clang static analysis + + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; + + return c; + } else return 0; + } + + xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) { + assert(argc <= 1); + + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + + return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + } + + xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) { + switch (name.begin[0]) { + case 'b': + if (name == PUGIXML_TEXT("boolean") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + + break; + + case 'c': + if (name == PUGIXML_TEXT("count") && argc == 1) { + if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + } else if (name == PUGIXML_TEXT("contains") && argc == 2) + return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); + else if (name == PUGIXML_TEXT("concat") && argc >= 2) + return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("ceiling") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + + break; + + case 'f': + if (name == PUGIXML_TEXT("false") && argc == 0) + return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + else if (name == PUGIXML_TEXT("floor") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + + break; + + case 'i': + if (name == PUGIXML_TEXT("id") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + + break; + + case 'l': + if (name == PUGIXML_TEXT("last") && argc == 0) + return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); + else if (name == PUGIXML_TEXT("lang") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("local-name") && argc <= 1) + return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + + break; + + case 'n': + if (name == PUGIXML_TEXT("name") && argc <= 1) + return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) + return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("not") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("number") && argc <= 1) + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + + break; + + case 'p': + if (name == PUGIXML_TEXT("position") && argc == 0) + return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); + + break; + + case 'r': + if (name == PUGIXML_TEXT("round") && argc == 1) + return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + + break; + + case 's': + if (name == PUGIXML_TEXT("string") && argc <= 1) + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("string-length") && argc <= 1) + return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); + else if (name == PUGIXML_TEXT("starts-with") && argc == 2) + return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-before") && argc == 2) + return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-after") && argc == 2) + return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) + return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("sum") && argc == 1) { + if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); + return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + } + + break; + + case 't': + if (name == PUGIXML_TEXT("translate") && argc == 3) + return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("true") && argc == 0) + return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + + break; + + default: + break; + } + + throw_error("Unrecognized function or wrong parameter count"); + + return 0; + } + + axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) { + specified = true; + + switch (name.begin[0]) { + case 'a': + if (name == PUGIXML_TEXT("ancestor")) + return axis_ancestor; + else if (name == PUGIXML_TEXT("ancestor-or-self")) + return axis_ancestor_or_self; + else if (name == PUGIXML_TEXT("attribute")) + return axis_attribute; + + break; + + case 'c': + if (name == PUGIXML_TEXT("child")) + return axis_child; + + break; + + case 'd': + if (name == PUGIXML_TEXT("descendant")) + return axis_descendant; + else if (name == PUGIXML_TEXT("descendant-or-self")) + return axis_descendant_or_self; + + break; + + case 'f': + if (name == PUGIXML_TEXT("following")) + return axis_following; + else if (name == PUGIXML_TEXT("following-sibling")) + return axis_following_sibling; + + break; + + case 'n': + if (name == PUGIXML_TEXT("namespace")) + return axis_namespace; + + break; + + case 'p': + if (name == PUGIXML_TEXT("parent")) + return axis_parent; + else if (name == PUGIXML_TEXT("preceding")) + return axis_preceding; + else if (name == PUGIXML_TEXT("preceding-sibling")) + return axis_preceding_sibling; + + break; + + case 's': + if (name == PUGIXML_TEXT("self")) + return axis_self; + + break; + + default: + break; + } + + specified = false; + return axis_child; + } + + nodetest_t parse_node_test_type(const xpath_lexer_string& name) { + switch (name.begin[0]) { + case 'c': + if (name == PUGIXML_TEXT("comment")) + return nodetest_type_comment; + + break; + + case 'n': + if (name == PUGIXML_TEXT("node")) + return nodetest_type_node; + + break; + + case 'p': + if (name == PUGIXML_TEXT("processing-instruction")) + return nodetest_type_pi; + + break; + + case 't': + if (name == PUGIXML_TEXT("text")) + return nodetest_type_text; + + break; + + default: + break; + } + + return nodetest_none; + } + + // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall + xpath_ast_node* parse_primary_expression() { + switch (_lexer.current()) { + case lex_var_ref: { + xpath_lexer_string name = _lexer.contents(); + + if (!_variables) + throw_error("Unknown variable: variable set is not provided"); + + xpath_variable* var = 0; + if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) + throw_error_oom(); + + if (!var) + throw_error("Unknown variable: variable set does not contain the given name"); + + _lexer.next(); + + return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); + } + + case lex_open_brace: { + _lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (_lexer.current() != lex_close_brace) + throw_error("Unmatched braces"); + + _lexer.next(); + + return n; + } + + case lex_quoted_string: { + const char_t* value = alloc_string(_lexer.contents()); + + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); + _lexer.next(); + + return n; + } + + case lex_number: { + double value = 0; + + if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) + throw_error_oom(); + + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); + _lexer.next(); + + return n; + } + + case lex_string: { + xpath_ast_node* args[2] = {0}; + size_t argc = 0; + + xpath_lexer_string function = _lexer.contents(); + _lexer.next(); + + xpath_ast_node* last_arg = 0; + + if (_lexer.current() != lex_open_brace) + throw_error("Unrecognized function call"); + _lexer.next(); + + if (_lexer.current() != lex_close_brace) + args[argc++] = parse_expression(); + + while (_lexer.current() != lex_close_brace) { + if (_lexer.current() != lex_comma) + throw_error("No comma between function arguments"); + _lexer.next(); + + xpath_ast_node* n = parse_expression(); + + if (argc < 2) args[argc] = n; + else last_arg->set_next(n); + + argc++; + last_arg = n; + } + + _lexer.next(); + + return parse_function(function, argc, args); + } + + default: + throw_error("Unrecognizable primary expression"); + + return 0; + } + } + + // FilterExpr ::= PrimaryExpr | FilterExpr Predicate + // Predicate ::= '[' PredicateExpr ']' + // PredicateExpr ::= Expr + xpath_ast_node* parse_filter_expression() { + xpath_ast_node* n = parse_primary_expression(); + + while (_lexer.current() == lex_open_square_brace) { + _lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); + + n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + + if (_lexer.current() != lex_close_square_brace) + throw_error("Unmatched square brace"); + + _lexer.next(); + } + + return n; + } + + // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep + // AxisSpecifier ::= AxisName '::' | '@'? + // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' + // NameTest ::= '*' | NCName ':' '*' | QName + // AbbreviatedStep ::= '.' | '..' + xpath_ast_node* parse_step(xpath_ast_node* set) { + if (set && set->rettype() != xpath_type_node_set) + throw_error("Step has to be applied to node set"); + + bool axis_specified = false; + axis_t axis = axis_child; // implied child axis + + if (_lexer.current() == lex_axis_attribute) { + axis = axis_attribute; + axis_specified = true; + + _lexer.next(); + } else if (_lexer.current() == lex_dot) { + _lexer.next(); + + return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); + } else if (_lexer.current() == lex_double_dot) { + _lexer.next(); + + return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); + } + + nodetest_t nt_type = nodetest_none; + xpath_lexer_string nt_name; + + if (_lexer.current() == lex_string) { + // node name test + nt_name = _lexer.contents(); + _lexer.next(); + + // was it an axis name? + if (_lexer.current() == lex_double_colon) { + // parse axis name + if (axis_specified) throw_error("Two axis specifiers in one step"); + + axis = parse_axis_name(nt_name, axis_specified); + + if (!axis_specified) throw_error("Unknown axis"); + + // read actual node test + _lexer.next(); + + if (_lexer.current() == lex_multiply) { + nt_type = nodetest_all; + nt_name = xpath_lexer_string(); + _lexer.next(); + } else if (_lexer.current() == lex_string) { + nt_name = _lexer.contents(); + _lexer.next(); + } else throw_error("Unrecognized node test"); + } + + if (nt_type == nodetest_none) { + // node type test or processing-instruction + if (_lexer.current() == lex_open_brace) { + _lexer.next(); + + if (_lexer.current() == lex_close_brace) { + _lexer.next(); + + nt_type = parse_node_test_type(nt_name); + + if (nt_type == nodetest_none) throw_error("Unrecognized node type"); + + nt_name = xpath_lexer_string(); + } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { + if (_lexer.current() != lex_quoted_string) + throw_error("Only literals are allowed as arguments to processing-instruction()"); + + nt_type = nodetest_pi; + nt_name = _lexer.contents(); + _lexer.next(); + + if (_lexer.current() != lex_close_brace) + throw_error("Unmatched brace near processing-instruction()"); + _lexer.next(); + } else + throw_error("Unmatched brace near node type test"); + + } + // QName or NCName:* + else { + if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') { // NCName:* + nt_name.end--; // erase * + + nt_type = nodetest_all_in_namespace; + } else nt_type = nodetest_name; + } + } + } else if (_lexer.current() == lex_multiply) { + nt_type = nodetest_all; + _lexer.next(); + } else throw_error("Unrecognized node test"); + + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); + + xpath_ast_node* last = 0; + + while (_lexer.current() == lex_open_square_brace) { + _lexer.next(); + + xpath_ast_node* expr = parse_expression(); + + xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); + + if (_lexer.current() != lex_close_square_brace) + throw_error("Unmatched square brace"); + _lexer.next(); + + if (last) last->set_next(pred); + else n->set_right(pred); + + last = pred; + } + + return n; + } + + // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step + xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) { + xpath_ast_node* n = parse_step(set); + + while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { + lexeme_t l = _lexer.current(); + _lexer.next(); + + if (l == lex_double_slash) + n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + + n = parse_step(n); + } + + return n; + } + + // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath + // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath + xpath_ast_node* parse_location_path() { + if (_lexer.current() == lex_slash) { + _lexer.next(); + + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + + // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path + lexeme_t l = _lexer.current(); + + if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) + return parse_relative_location_path(n); + else + return n; + } else if (_lexer.current() == lex_double_slash) { + _lexer.next(); + + xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + + return parse_relative_location_path(n); + } + + // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 + return parse_relative_location_path(0); + } + + // PathExpr ::= LocationPath + // | FilterExpr + // | FilterExpr '/' RelativeLocationPath + // | FilterExpr '//' RelativeLocationPath + // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr + // UnaryExpr ::= UnionExpr | '-' UnaryExpr + xpath_ast_node* parse_path_or_unary_expression() { + // Clarification. + // PathExpr begins with either LocationPath or FilterExpr. + // FilterExpr begins with PrimaryExpr + // PrimaryExpr begins with '$' in case of it being a variable reference, + // '(' in case of it being an expression, string literal, number constant or + // function call. + + if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || + _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || + _lexer.current() == lex_string) { + if (_lexer.current() == lex_string) { + // This is either a function call, or not - if not, we shall proceed with location path + const char_t* state = _lexer.state(); + + while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; + + if (*state != '(') return parse_location_path(); + + // This looks like a function call; however this still can be a node-test. Check it. + if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); + } + + xpath_ast_node* n = parse_filter_expression(); + + if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { + lexeme_t l = _lexer.current(); + _lexer.next(); + + if (l == lex_double_slash) { + if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); + + n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + } + + // select from location path + return parse_relative_location_path(n); + } + + return n; + } else if (_lexer.current() == lex_minus) { + _lexer.next(); + + // precedence 7+ - only parses union expressions + xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); + + return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + } else + return parse_location_path(); + } + + struct binary_op_t { + ast_type_t asttype; + xpath_value_type rettype; + int precedence; + + binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) { + } + + binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) { + } + + static binary_op_t parse(xpath_lexer& lexer) { + switch (lexer.current()) { + case lex_string: + if (lexer.contents() == PUGIXML_TEXT("or")) + return binary_op_t(ast_op_or, xpath_type_boolean, 1); + else if (lexer.contents() == PUGIXML_TEXT("and")) + return binary_op_t(ast_op_and, xpath_type_boolean, 2); + else if (lexer.contents() == PUGIXML_TEXT("div")) + return binary_op_t(ast_op_divide, xpath_type_number, 6); + else if (lexer.contents() == PUGIXML_TEXT("mod")) + return binary_op_t(ast_op_mod, xpath_type_number, 6); + else + return binary_op_t(); + + case lex_equal: + return binary_op_t(ast_op_equal, xpath_type_boolean, 3); + + case lex_not_equal: + return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); + + case lex_less: + return binary_op_t(ast_op_less, xpath_type_boolean, 4); + + case lex_greater: + return binary_op_t(ast_op_greater, xpath_type_boolean, 4); + + case lex_less_or_equal: + return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); + + case lex_greater_or_equal: + return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); + + case lex_plus: + return binary_op_t(ast_op_add, xpath_type_number, 5); + + case lex_minus: + return binary_op_t(ast_op_subtract, xpath_type_number, 5); + + case lex_multiply: + return binary_op_t(ast_op_multiply, xpath_type_number, 6); + + case lex_union: + return binary_op_t(ast_op_union, xpath_type_node_set, 7); + + default: + return binary_op_t(); + } + } + }; + + xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) { + binary_op_t op = binary_op_t::parse(_lexer); + + while (op.asttype != ast_unknown && op.precedence >= limit) { + _lexer.next(); + + xpath_ast_node* rhs = parse_path_or_unary_expression(); + + binary_op_t nextop = binary_op_t::parse(_lexer); + + while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) { + rhs = parse_expression_rec(rhs, nextop.precedence); + + nextop = binary_op_t::parse(_lexer); + } + + if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) + throw_error("Union operator has to be applied to node sets"); + + lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + + op = binary_op_t::parse(_lexer); + } + + return lhs; + } + + // Expr ::= OrExpr + // OrExpr ::= AndExpr | OrExpr 'or' AndExpr + // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr + // EqualityExpr ::= RelationalExpr + // | EqualityExpr '=' RelationalExpr + // | EqualityExpr '!=' RelationalExpr + // RelationalExpr ::= AdditiveExpr + // | RelationalExpr '<' AdditiveExpr + // | RelationalExpr '>' AdditiveExpr + // | RelationalExpr '<=' AdditiveExpr + // | RelationalExpr '>=' AdditiveExpr + // AdditiveExpr ::= MultiplicativeExpr + // | AdditiveExpr '+' MultiplicativeExpr + // | AdditiveExpr '-' MultiplicativeExpr + // MultiplicativeExpr ::= UnaryExpr + // | MultiplicativeExpr '*' UnaryExpr + // | MultiplicativeExpr 'div' UnaryExpr + // | MultiplicativeExpr 'mod' UnaryExpr + xpath_ast_node* parse_expression() { + return parse_expression_rec(parse_path_or_unary_expression(), 0); + } + + xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) { + } + + xpath_ast_node* parse() { + xpath_ast_node* result = parse_expression(); + + if (_lexer.current() != lex_eof) { + // there are still unparsed tokens left, error + throw_error("Incorrect query"); + } + + return result; + } + + static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) { + xpath_parser parser(query, variables, alloc, result); + +#ifdef PUGIXML_NO_EXCEPTIONS + int error = setjmp(parser._error_handler); + + return (error == 0) ? parser.parse() : 0; +#else + return parser.parse(); +#endif + } +}; + +struct xpath_query_impl { + static xpath_query_impl* create() { + void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); + if (!memory) return 0; + + return new (memory) xpath_query_impl(); + } + + static void destroy(xpath_query_impl* impl) { + // free all allocated pages + impl->alloc.release(); + + // free allocator memory (with the first page) + xml_memory::deallocate(impl); + } + + xpath_query_impl(): root(0), alloc(&block) { + block.next = 0; + block.capacity = sizeof(block.data); + } + + xpath_ast_node* root; + xpath_allocator alloc; + xpath_memory_block block; +}; + +PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) +{ + if (!impl) return xpath_string(); + +#ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return xpath_string(); +#endif + + xpath_context c(n, 1, 1); + + return impl->root->eval_string(c, sd.stack); +} + +PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) +{ + if (!impl) return 0; + + if (impl->root->rettype() != xpath_type_node_set) { +#ifdef PUGIXML_NO_EXCEPTIONS + return 0; +#else + xpath_parse_result res; + res.error = "Expression does not evaluate to node set"; + + throw xpath_exception(res); +#endif + } + + return impl->root; +} PUGI__NS_END namespace pugi { #ifndef PUGIXML_NO_EXCEPTIONS - PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) - { - assert(_result.error); - } - - PUGI__FN const char* xpath_exception::what() const throw() - { - return _result.error; - } +PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) +{ + assert(_result.error); +} - PUGI__FN const xpath_parse_result& xpath_exception::result() const - { - return _result; - } +PUGI__FN const char* xpath_exception::what() const throw() +{ + return _result.error; +} + +PUGI__FN const xpath_parse_result& xpath_exception::result() const +{ + return _result; +} #endif - - PUGI__FN xpath_node::xpath_node() - { - } - - PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) - { - } - - PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) - { - } - PUGI__FN xml_node xpath_node::node() const - { - return _attribute ? xml_node() : _node; - } - - PUGI__FN xml_attribute xpath_node::attribute() const - { - return _attribute; - } - - PUGI__FN xml_node xpath_node::parent() const - { - return _attribute ? _node : _node.parent(); - } +PUGI__FN xpath_node::xpath_node() +{ +} - PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) - { - } +PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) +{ +} - PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const - { - return (_node || _attribute) ? unspecified_bool_xpath_node : 0; - } - - PUGI__FN bool xpath_node::operator!() const - { - return !(_node || _attribute); - } +PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) +{ +} - PUGI__FN bool xpath_node::operator==(const xpath_node& n) const - { - return _node == n._node && _attribute == n._attribute; - } - - PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const - { - return _node != n._node || _attribute != n._attribute; - } +PUGI__FN xml_node xpath_node::node() const +{ + return _attribute ? xml_node() : _node; +} + +PUGI__FN xml_attribute xpath_node::attribute() const +{ + return _attribute; +} + +PUGI__FN xml_node xpath_node::parent() const +{ + return _attribute ? _node : _node.parent(); +} + +PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) +{ +} + +PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const +{ + return (_node || _attribute) ? unspecified_bool_xpath_node : 0; +} + +PUGI__FN bool xpath_node::operator!() const +{ + return !(_node || _attribute); +} + +PUGI__FN bool xpath_node::operator==(const xpath_node& n) const +{ + return _node == n._node && _attribute == n._attribute; +} + +PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const +{ + return _node != n._node || _attribute != n._attribute; +} #ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } +PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) +{ + return (bool)lhs && rhs; +} - PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } +PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) +{ + return (bool)lhs || rhs; +} #endif - PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) - { - assert(begin_ <= end_); +PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) +{ + assert(begin_ <= end_); - size_t size_ = static_cast(end_ - begin_); + size_t size_ = static_cast(end_ - begin_); - if (size_ <= 1) - { - // deallocate old buffer - if (_begin != &_storage) impl::xml_memory::deallocate(_begin); + if (size_ <= 1) { + // deallocate old buffer + if (_begin != &_storage) impl::xml_memory::deallocate(_begin); - // use internal buffer - if (begin_ != end_) _storage = *begin_; + // use internal buffer + if (begin_ != end_) _storage = *begin_; - _begin = &_storage; - _end = &_storage + size_; - _type = type_; - } - else - { - // make heap copy - xpath_node* storage = static_cast(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); + _begin = &_storage; + _end = &_storage + size_; + _type = type_; + } else { + // make heap copy + xpath_node* storage = static_cast(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); - if (!storage) - { - #ifdef PUGIXML_NO_EXCEPTIONS - return; - #else - throw std::bad_alloc(); - #endif - } + if (!storage) { +#ifdef PUGIXML_NO_EXCEPTIONS + return; +#else + throw std::bad_alloc(); +#endif + } - memcpy(storage, begin_, size_ * sizeof(xpath_node)); - - // deallocate old buffer - if (_begin != &_storage) impl::xml_memory::deallocate(_begin); + memcpy(storage, begin_, size_ * sizeof(xpath_node)); - // finalize - _begin = storage; - _end = storage + size_; - _type = type_; - } - } + // deallocate old buffer + if (_begin != &_storage) impl::xml_memory::deallocate(_begin); + + // finalize + _begin = storage; + _end = storage + size_; + _type = type_; + } +} #if __cplusplus >= 201103 - PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) - { - _type = rhs._type; - _storage = rhs._storage; - _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; - _end = _begin + (rhs._end - rhs._begin); +PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) +{ + _type = rhs._type; + _storage = rhs._storage; + _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; + _end = _begin + (rhs._end - rhs._begin); - rhs._type = type_unsorted; - rhs._begin = &rhs._storage; - rhs._end = rhs._begin; - } + rhs._type = type_unsorted; + rhs._begin = &rhs._storage; + rhs._end = rhs._begin; +} #endif - PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - } +PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) +{ +} - PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _assign(begin_, end_, type_); - } +PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) +{ + _assign(begin_, end_, type_); +} - PUGI__FN xpath_node_set::~xpath_node_set() - { - if (_begin != &_storage) - impl::xml_memory::deallocate(_begin); - } - - PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _assign(ns._begin, ns._end, ns._type); - } - - PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) - { - if (this == &ns) return *this; +PUGI__FN xpath_node_set::~xpath_node_set() +{ + if (_begin != &_storage) + impl::xml_memory::deallocate(_begin); +} - _assign(ns._begin, ns._end, ns._type); +PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) +{ + _assign(ns._begin, ns._end, ns._type); +} - return *this; - } +PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) +{ + if (this == &ns) return *this; + + _assign(ns._begin, ns._end, ns._type); + + return *this; +} #if __cplusplus >= 201103 - PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _move(rhs); - } +PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) +{ + _move(rhs); +} - PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) - { - if (this == &rhs) return *this; +PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) +{ + if (this == &rhs) return *this; - if (_begin != &_storage) - impl::xml_memory::deallocate(_begin); + if (_begin != &_storage) + impl::xml_memory::deallocate(_begin); - _move(rhs); + _move(rhs); - return *this; - } + return *this; +} #endif - PUGI__FN xpath_node_set::type_t xpath_node_set::type() const - { - return _type; - } - - PUGI__FN size_t xpath_node_set::size() const - { - return _end - _begin; - } - - PUGI__FN bool xpath_node_set::empty() const - { - return _begin == _end; - } - - PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const - { - assert(index < size()); - return _begin[index]; - } +PUGI__FN xpath_node_set::type_t xpath_node_set::type() const +{ + return _type; +} - PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const - { - return _begin; - } - - PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const - { - return _end; - } - - PUGI__FN void xpath_node_set::sort(bool reverse) - { - _type = impl::xpath_sort(_begin, _end, _type, reverse); - } +PUGI__FN size_t xpath_node_set::size() const +{ + return _end - _begin; +} - PUGI__FN xpath_node xpath_node_set::first() const - { - return impl::xpath_first(_begin, _end, _type); - } +PUGI__FN bool xpath_node_set::empty() const +{ + return _begin == _end; +} - PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) - { - } +PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const +{ + assert(index < size()); + return _begin[index]; +} - PUGI__FN xpath_parse_result::operator bool() const - { - return error == 0; - } +PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const +{ + return _begin; +} - PUGI__FN const char* xpath_parse_result::description() const - { - return error ? error : "No error"; - } +PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const +{ + return _end; +} - PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) - { - } +PUGI__FN void xpath_node_set::sort(bool reverse) +{ + _type = impl::xpath_sort(_begin, _end, _type, reverse); +} - PUGI__FN const char_t* xpath_variable::name() const - { - switch (_type) - { - case xpath_type_node_set: - return static_cast(this)->name; +PUGI__FN xpath_node xpath_node_set::first() const +{ + return impl::xpath_first(_begin, _end, _type); +} - case xpath_type_number: - return static_cast(this)->name; +PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) +{ +} - case xpath_type_string: - return static_cast(this)->name; +PUGI__FN xpath_parse_result::operator bool() const +{ + return error == 0; +} - case xpath_type_boolean: - return static_cast(this)->name; +PUGI__FN const char* xpath_parse_result::description() const +{ + return error ? error : "No error"; +} - default: - assert(!"Invalid variable type"); - return 0; - } - } +PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) +{ +} - PUGI__FN xpath_value_type xpath_variable::type() const - { - return _type; - } +PUGI__FN const char_t* xpath_variable::name() const +{ + switch (_type) { + case xpath_type_node_set: + return static_cast(this)->name; - PUGI__FN bool xpath_variable::get_boolean() const - { - return (_type == xpath_type_boolean) ? static_cast(this)->value : false; - } + case xpath_type_number: + return static_cast(this)->name; - PUGI__FN double xpath_variable::get_number() const - { - return (_type == xpath_type_number) ? static_cast(this)->value : impl::gen_nan(); - } + case xpath_type_string: + return static_cast(this)->name; - PUGI__FN const char_t* xpath_variable::get_string() const - { - const char_t* value = (_type == xpath_type_string) ? static_cast(this)->value : 0; - return value ? value : PUGIXML_TEXT(""); - } + case xpath_type_boolean: + return static_cast(this)->name; - PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const - { - return (_type == xpath_type_node_set) ? static_cast(this)->value : impl::dummy_node_set; - } + default: + assert(!"Invalid variable type"); + return 0; + } +} - PUGI__FN bool xpath_variable::set(bool value) - { - if (_type != xpath_type_boolean) return false; +PUGI__FN xpath_value_type xpath_variable::type() const +{ + return _type; +} - static_cast(this)->value = value; - return true; - } +PUGI__FN bool xpath_variable::get_boolean() const +{ + return (_type == xpath_type_boolean) ? static_cast(this)->value : false; +} - PUGI__FN bool xpath_variable::set(double value) - { - if (_type != xpath_type_number) return false; +PUGI__FN double xpath_variable::get_number() const +{ + return (_type == xpath_type_number) ? static_cast(this)->value : impl::gen_nan(); +} - static_cast(this)->value = value; - return true; - } +PUGI__FN const char_t* xpath_variable::get_string() const +{ + const char_t* value = (_type == xpath_type_string) ? static_cast(this)->value : 0; + return value ? value : PUGIXML_TEXT(""); +} - PUGI__FN bool xpath_variable::set(const char_t* value) - { - if (_type != xpath_type_string) return false; +PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const +{ + return (_type == xpath_type_node_set) ? static_cast(this)->value : impl::dummy_node_set; +} - impl::xpath_variable_string* var = static_cast(this); +PUGI__FN bool xpath_variable::set(bool value) +{ + if (_type != xpath_type_boolean) return false; - // duplicate string - size_t size = (impl::strlength(value) + 1) * sizeof(char_t); + static_cast(this)->value = value; + return true; +} - char_t* copy = static_cast(impl::xml_memory::allocate(size)); - if (!copy) return false; +PUGI__FN bool xpath_variable::set(double value) +{ + if (_type != xpath_type_number) return false; - memcpy(copy, value, size); + static_cast(this)->value = value; + return true; +} - // replace old string - if (var->value) impl::xml_memory::deallocate(var->value); - var->value = copy; +PUGI__FN bool xpath_variable::set(const char_t* value) +{ + if (_type != xpath_type_string) return false; - return true; - } + impl::xpath_variable_string* var = static_cast(this); - PUGI__FN bool xpath_variable::set(const xpath_node_set& value) - { - if (_type != xpath_type_node_set) return false; + // duplicate string + size_t size = (impl::strlength(value) + 1) * sizeof(char_t); - static_cast(this)->value = value; - return true; - } + char_t* copy = static_cast(impl::xml_memory::allocate(size)); + if (!copy) return false; - PUGI__FN xpath_variable_set::xpath_variable_set() - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _data[i] = 0; - } + memcpy(copy, value, size); - PUGI__FN xpath_variable_set::~xpath_variable_set() - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _destroy(_data[i]); - } + // replace old string + if (var->value) impl::xml_memory::deallocate(var->value); + var->value = copy; - PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _data[i] = 0; + return true; +} - _assign(rhs); - } +PUGI__FN bool xpath_variable::set(const xpath_node_set& value) +{ + if (_type != xpath_type_node_set) return false; - PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) - { - if (this == &rhs) return *this; + static_cast(this)->value = value; + return true; +} - _assign(rhs); +PUGI__FN xpath_variable_set::xpath_variable_set() +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) + _data[i] = 0; +} - return *this; - } +PUGI__FN xpath_variable_set::~xpath_variable_set() +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) + _destroy(_data[i]); +} + +PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) + _data[i] = 0; + + _assign(rhs); +} + +PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) +{ + if (this == &rhs) return *this; + + _assign(rhs); + + return *this; +} #if __cplusplus >= 201103 - PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - _data[i] = rhs._data[i]; - rhs._data[i] = 0; - } - } +PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) { + _data[i] = rhs._data[i]; + rhs._data[i] = 0; + } +} - PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - _destroy(_data[i]); +PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) { + _destroy(_data[i]); - _data[i] = rhs._data[i]; - rhs._data[i] = 0; - } + _data[i] = rhs._data[i]; + rhs._data[i] = 0; + } - return *this; - } + return *this; +} #endif - PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) - { - xpath_variable_set temp; +PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) +{ + xpath_variable_set temp; - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) - return; + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) + if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) + return; - _swap(temp); - } + _swap(temp); +} - PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - xpath_variable* chain = _data[i]; +PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) +{ + for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) { + xpath_variable* chain = _data[i]; - _data[i] = rhs._data[i]; - rhs._data[i] = chain; - } - } + _data[i] = rhs._data[i]; + rhs._data[i] = chain; + } +} - PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const - { - const size_t hash_size = sizeof(_data) / sizeof(_data[0]); - size_t hash = impl::hash_string(name) % hash_size; +PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const +{ + const size_t hash_size = sizeof(_data) / sizeof(_data[0]); + size_t hash = impl::hash_string(name) % hash_size; - // look for existing variable - for (xpath_variable* var = _data[hash]; var; var = var->_next) - if (impl::strequal(var->name(), name)) - return var; + // look for existing variable + for (xpath_variable* var = _data[hash]; var; var = var->_next) + if (impl::strequal(var->name(), name)) + return var; - return 0; - } + return 0; +} - PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) - { - xpath_variable* last = 0; +PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) +{ + xpath_variable* last = 0; - while (var) - { - // allocate storage for new variable - xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); - if (!nvar) return false; + while (var) { + // allocate storage for new variable + xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); + if (!nvar) return false; - // link the variable to the result immediately to handle failures gracefully - if (last) - last->_next = nvar; - else - *out_result = nvar; + // link the variable to the result immediately to handle failures gracefully + if (last) + last->_next = nvar; + else + *out_result = nvar; - last = nvar; + last = nvar; - // copy the value; this can fail due to out-of-memory conditions - if (!impl::copy_xpath_variable(nvar, var)) return false; + // copy the value; this can fail due to out-of-memory conditions + if (!impl::copy_xpath_variable(nvar, var)) return false; - var = var->_next; - } + var = var->_next; + } - return true; - } + return true; +} - PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) - { - while (var) - { - xpath_variable* next = var->_next; +PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) +{ + while (var) { + xpath_variable* next = var->_next; - impl::delete_xpath_variable(var->_type, var); + impl::delete_xpath_variable(var->_type, var); - var = next; - } - } + var = next; + } +} - PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) - { - const size_t hash_size = sizeof(_data) / sizeof(_data[0]); - size_t hash = impl::hash_string(name) % hash_size; +PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) +{ + const size_t hash_size = sizeof(_data) / sizeof(_data[0]); + size_t hash = impl::hash_string(name) % hash_size; - // look for existing variable - for (xpath_variable* var = _data[hash]; var; var = var->_next) - if (impl::strequal(var->name(), name)) - return var->type() == type ? var : 0; + // look for existing variable + for (xpath_variable* var = _data[hash]; var; var = var->_next) + if (impl::strequal(var->name(), name)) + return var->type() == type ? var : 0; - // add new variable - xpath_variable* result = impl::new_xpath_variable(type, name); + // add new variable + xpath_variable* result = impl::new_xpath_variable(type, name); - if (result) - { - result->_next = _data[hash]; + if (result) { + result->_next = _data[hash]; - _data[hash] = result; - } + _data[hash] = result; + } - return result; - } + return result; +} - PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) - { - xpath_variable* var = add(name, xpath_type_boolean); - return var ? var->set(value) : false; - } +PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) +{ + xpath_variable* var = add(name, xpath_type_boolean); + return var ? var->set(value) : false; +} - PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) - { - xpath_variable* var = add(name, xpath_type_number); - return var ? var->set(value) : false; - } +PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) +{ + xpath_variable* var = add(name, xpath_type_number); + return var ? var->set(value) : false; +} - PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) - { - xpath_variable* var = add(name, xpath_type_string); - return var ? var->set(value) : false; - } +PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) +{ + xpath_variable* var = add(name, xpath_type_string); + return var ? var->set(value) : false; +} - PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) - { - xpath_variable* var = add(name, xpath_type_node_set); - return var ? var->set(value) : false; - } +PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) +{ + xpath_variable* var = add(name, xpath_type_node_set); + return var ? var->set(value) : false; +} - PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) - { - return _find(name); - } +PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) +{ + return _find(name); +} - PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const - { - return _find(name); - } +PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const +{ + return _find(name); +} - PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) - { - impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); +PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) +{ + impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); - if (!qimpl) - { - #ifdef PUGIXML_NO_EXCEPTIONS - _result.error = "Out of memory"; - #else - throw std::bad_alloc(); - #endif - } - else - { - using impl::auto_deleter; // MSVC7 workaround - auto_deleter impl(qimpl, impl::xpath_query_impl::destroy); + if (!qimpl) { +#ifdef PUGIXML_NO_EXCEPTIONS + _result.error = "Out of memory"; +#else + throw std::bad_alloc(); +#endif + } else { + using impl::auto_deleter; // MSVC7 workaround + auto_deleter impl(qimpl, impl::xpath_query_impl::destroy); - qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); + qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); - if (qimpl->root) - { - qimpl->root->optimize(&qimpl->alloc); + if (qimpl->root) { + qimpl->root->optimize(&qimpl->alloc); - _impl = impl.release(); - _result.error = 0; - } - } - } + _impl = impl.release(); + _result.error = 0; + } + } +} - PUGI__FN xpath_query::xpath_query(): _impl(0) - { - } +PUGI__FN xpath_query::xpath_query(): _impl(0) +{ +} - PUGI__FN xpath_query::~xpath_query() - { - if (_impl) - impl::xpath_query_impl::destroy(static_cast(_impl)); - } +PUGI__FN xpath_query::~xpath_query() +{ + if (_impl) + impl::xpath_query_impl::destroy(static_cast(_impl)); +} #if __cplusplus >= 201103 - PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) - { - _impl = rhs._impl; - _result = rhs._result; - rhs._impl = 0; - rhs._result = xpath_parse_result(); - } +PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) +{ + _impl = rhs._impl; + _result = rhs._result; + rhs._impl = 0; + rhs._result = xpath_parse_result(); +} - PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) - { - if (this == &rhs) return *this; +PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) +{ + if (this == &rhs) return *this; - if (_impl) - impl::xpath_query_impl::destroy(static_cast(_impl)); + if (_impl) + impl::xpath_query_impl::destroy(static_cast(_impl)); - _impl = rhs._impl; - _result = rhs._result; - rhs._impl = 0; - rhs._result = xpath_parse_result(); + _impl = rhs._impl; + _result = rhs._result; + rhs._impl = 0; + rhs._result = xpath_parse_result(); - return *this; - } + return *this; +} #endif - PUGI__FN xpath_value_type xpath_query::return_type() const - { - if (!_impl) return xpath_type_none; +PUGI__FN xpath_value_type xpath_query::return_type() const +{ + if (!_impl) return xpath_type_none; - return static_cast(_impl)->root->rettype(); - } + return static_cast(_impl)->root->rettype(); +} - PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const - { - if (!_impl) return false; - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; +PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const +{ + if (!_impl) return false; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif - - return static_cast(_impl)->root->eval_boolean(c, sd.stack); - } - - PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const - { - if (!_impl) return impl::gen_nan(); - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; + impl::xpath_context c(n, 1, 1); + impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif +#ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return false; +#endif - return static_cast(_impl)->root->eval_number(c, sd.stack); - } + return static_cast(_impl)->root->eval_boolean(c, sd.stack); +} + +PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const +{ + if (!_impl) return impl::gen_nan(); + + impl::xpath_context c(n, 1, 1); + impl::xpath_stack_data sd; + +#ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return impl::gen_nan(); +#endif + + return static_cast(_impl)->root->eval_number(c, sd.stack); +} #ifndef PUGIXML_NO_STL - PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const - { - impl::xpath_stack_data sd; +PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const +{ + impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); - return string_t(r.c_str(), r.length()); - } + return string_t(r.c_str(), r.length()); +} #endif - PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const - { - impl::xpath_stack_data sd; +PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const +{ + impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); - size_t full_size = r.length() + 1; - - if (capacity > 0) - { - size_t size = (full_size < capacity) ? full_size : capacity; - assert(size > 0); + size_t full_size = r.length() + 1; - memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); - buffer[size - 1] = 0; - } - - return full_size; - } + if (capacity > 0) { + size_t size = (full_size < capacity) ? full_size : capacity; + assert(size > 0); - PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const - { - impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); - if (!root) return xpath_node_set(); + memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); + buffer[size - 1] = 0; + } - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; + return full_size; +} - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif +PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const +{ + impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); + if (!root) return xpath_node_set(); - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); + impl::xpath_context c(n, 1, 1); + impl::xpath_stack_data sd; - return xpath_node_set(r.begin(), r.end(), r.type()); - } +#ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return xpath_node_set(); +#endif - PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const - { - impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); - if (!root) return xpath_node(); + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; + return xpath_node_set(r.begin(), r.end(), r.type()); +} - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif +PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const +{ + impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); + if (!root) return xpath_node(); - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); + impl::xpath_context c(n, 1, 1); + impl::xpath_stack_data sd; - return r.first(); - } +#ifdef PUGIXML_NO_EXCEPTIONS + if (setjmp(sd.error_handler)) return xpath_node(); +#endif - PUGI__FN const xpath_parse_result& xpath_query::result() const - { - return _result; - } + impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); - PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) - { - } + return r.first(); +} - PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const - { - return _impl ? unspecified_bool_xpath_query : 0; - } +PUGI__FN const xpath_parse_result& xpath_query::result() const +{ + return _result; +} - PUGI__FN bool xpath_query::operator!() const - { - return !_impl; - } +PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) +{ +} - PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_node(q); - } +PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const +{ + return _impl ? unspecified_bool_xpath_query : 0; +} - PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const - { - return query.evaluate_node(*this); - } +PUGI__FN bool xpath_query::operator!() const +{ + return !_impl; +} - PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_nodes(q); - } +PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const +{ + xpath_query q(query, variables); + return select_node(q); +} - PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const - { - return query.evaluate_node_set(*this); - } +PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const +{ + return query.evaluate_node(*this); +} - PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_single_node(q); - } +PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const +{ + xpath_query q(query, variables); + return select_nodes(q); +} - PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const - { - return query.evaluate_node(*this); - } +PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const +{ + return query.evaluate_node_set(*this); +} + +PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const +{ + xpath_query q(query, variables); + return select_single_node(q); +} + +PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const +{ + return query.evaluate_node(*this); +} } #endif @@ -12432,7 +11444,7 @@ namespace pugi * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/moses2/server/Server.cpp b/moses2/server/Server.cpp index 4befff98e..2293c62cd 100644 --- a/moses2/server/Server.cpp +++ b/moses2/server/Server.cpp @@ -16,8 +16,8 @@ namespace Moses2 { Server::Server(ServerOptions &server_options, System &system) -:m_server_options(server_options) -,m_translator(new Translator(*this, system)) + :m_server_options(server_options) + ,m_translator(new Translator(*this, system)) { m_registry.addMethod("translate", m_translator); } @@ -30,17 +30,17 @@ Server::~Server() void Server::run(System &system) { xmlrpc_c::serverAbyss myAbyssServer - (xmlrpc_c::serverAbyss::constrOpt() - .registryP(&m_registry) - .portNumber(m_server_options.port) // TCP port on which to listen - .logFileName(m_server_options.logfile) - .allowOrigin("*") - .maxConn(m_server_options.maxConn) - .maxConnBacklog(m_server_options.maxConnBacklog) - .keepaliveTimeout(m_server_options.keepaliveTimeout) - .keepaliveMaxConn(m_server_options.keepaliveMaxConn) - .timeout(m_server_options.timeout) - ); + (xmlrpc_c::serverAbyss::constrOpt() + .registryP(&m_registry) + .portNumber(m_server_options.port) // TCP port on which to listen + .logFileName(m_server_options.logfile) + .allowOrigin("*") + .maxConn(m_server_options.maxConn) + .maxConnBacklog(m_server_options.maxConnBacklog) + .keepaliveTimeout(m_server_options.keepaliveTimeout) + .keepaliveMaxConn(m_server_options.keepaliveMaxConn) + .timeout(m_server_options.timeout) + ); std::ostringstream pidfilename; pidfilename << "/tmp/moses-server." << m_server_options.port << ".pid"; m_pidfile = pidfilename.str(); @@ -48,12 +48,10 @@ void Server::run(System &system) pidfile << getpid() << std::endl; pidfile.close(); cerr << "Listening on port " << m_server_options.port << std::endl; - if (m_server_options.is_serial) - { - cerr << "Running server in serial mode." << std::endl; - while(true) myAbyssServer.runOnce(); - } - else myAbyssServer.run(); + if (m_server_options.is_serial) { + cerr << "Running server in serial mode." << std::endl; + while(true) myAbyssServer.runOnce(); + } else myAbyssServer.run(); std::cerr << "xmlrpc_c::serverAbyss.run() returned but it should not." << std::endl; diff --git a/moses2/server/TranslationRequest.cpp b/moses2/server/TranslationRequest.cpp index dd37d621c..2d50835a6 100644 --- a/moses2/server/TranslationRequest.cpp +++ b/moses2/server/TranslationRequest.cpp @@ -14,10 +14,10 @@ TranslationRequest(xmlrpc_c::paramList const& paramList, System &system, const std::string &line, long translationId) -:TranslationTask(system, line, translationId) -,m_cond(cond) -,m_mutex(mut) -,m_done(false) + :TranslationTask(system, line, translationId) + ,m_cond(cond) + ,m_mutex(mut) + ,m_done(false) { } @@ -25,12 +25,12 @@ TranslationRequest(xmlrpc_c::paramList const& paramList, boost::shared_ptr TranslationRequest:: create(Translator* translator, - xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut, - System &system, - const std::string &line, - long translationId) + xmlrpc_c::paramList const& paramList, + boost::condition_variable& cond, + boost::mutex& mut, + System &system, + const std::string &line, + long translationId) { boost::shared_ptr ret; TranslationRequest *request = new TranslationRequest(paramList, cond, mut, system, line, translationId); @@ -38,7 +38,7 @@ create(Translator* translator, ret->m_translator = translator; return ret; } - + void TranslationRequest:: Run() @@ -60,7 +60,7 @@ Run() void TranslationRequest::pack_hypothesis(const Manager& manager, Hypothesis const* h, std::string const& key, - std::map & dest) const + std::map & dest) const { } diff --git a/moses2/server/TranslationRequest.h b/moses2/server/TranslationRequest.h index 0f63bc57a..822cde153 100644 --- a/moses2/server/TranslationRequest.h +++ b/moses2/server/TranslationRequest.h @@ -22,7 +22,7 @@ class System; class Manager; class -TranslationRequest : public virtual TranslationTask + TranslationRequest : public virtual TranslationTask { protected: std::map m_retData; @@ -41,7 +41,7 @@ protected: void pack_hypothesis(const Manager& manager, Hypothesis const* h, - std::string const& key, + std::string const& key, std::map & dest) const; public: @@ -49,12 +49,12 @@ public: static boost::shared_ptr create(Translator* translator, - xmlrpc_c::paramList const& paramList, + xmlrpc_c::paramList const& paramList, boost::condition_variable& cond, boost::mutex& mut, System &system, - const std::string &line, - long translationId); + const std::string &line, + long translationId); virtual bool diff --git a/moses2/server/Translator.cpp b/moses2/server/Translator.cpp index fd855c136..6f6212323 100644 --- a/moses2/server/Translator.cpp +++ b/moses2/server/Translator.cpp @@ -16,10 +16,10 @@ namespace Moses2 { Translator::Translator(Server& server, System &system) -: m_server(server), - m_threadPool(server.options().numThreads), - m_system(system), - m_translationId(0) + : m_server(server), + m_threadPool(server.options().numThreads), + m_system(system), + m_translationId(0) { // signature and help strings are documentation -- the client // can query this information with a system.methodSignature and @@ -34,7 +34,7 @@ Translator::~Translator() } void Translator::execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value *const retvalP) + xmlrpc_c::value *const retvalP) { typedef std::map param_t; param_t const& params = paramList.getStruct(0); diff --git a/moses2/server/Translator.h b/moses2/server/Translator.h index ba2c68ceb..bb84c70b1 100644 --- a/moses2/server/Translator.h +++ b/moses2/server/Translator.h @@ -25,7 +25,7 @@ public: virtual ~Translator(); void execute(xmlrpc_c::paramList const& paramList, - xmlrpc_c::value * const retvalP); + xmlrpc_c::value * const retvalP); protected: Server& m_server; From 0b3cb893069b4f34ab0f4fdac7e0a4e1e9eab05d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 1 Feb 2017 00:57:42 +0000 Subject: [PATCH 107/176] jam file for regression tests --- moses2/FF/PointerState.cpp | 6 ++++++ regression-testing/Jamfile | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/moses2/FF/PointerState.cpp b/moses2/FF/PointerState.cpp index e69de29bb..facb0a2f9 100644 --- a/moses2/FF/PointerState.cpp +++ b/moses2/FF/PointerState.cpp @@ -0,0 +1,6 @@ +#include "PointerState.h" + +namespace Moses2 +{ + +} diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 17e399e43..e72470c12 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -41,7 +41,7 @@ if $(with-regtest) { } else { reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; - reg_test moses2 : [ glob $(test-dir)/moses2.* : $(test-dir)/*withDALM ] : ../contrib/moses2//moses2 : @reg_test_decode ; + reg_test moses2 : [ glob $(test-dir)/moses2.* : $(test-dir)/*withDALM ] : ../moses2//moses2 : @reg_test_decode ; } if [ option.get "with-dalm" : : "yes" ] { From 657deaef5c9c66d862d5aeb9f2d5e2852c2680d9 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Wed, 1 Feb 2017 00:59:30 +0000 Subject: [PATCH 108/176] daily automatic beautifier --- moses/Util.h | 2 +- .../ProbingPT/line_splitter.hh | 6 +- .../ProbingPT/probing_hash_utils.hh | 12 +- moses2/TranslationModel/ProbingPT/querying.hh | 13 +- moses2/TranslationModel/ProbingPT/storing.hh | 23 +- moses2/TranslationModel/ProbingPT/vocabid.hh | 4 +- moses2/pugiconfig.hpp | 2 +- moses2/pugixml.cpp | 2 +- moses2/pugixml.hpp | 2411 ++++++++--------- 9 files changed, 1229 insertions(+), 1246 deletions(-) diff --git a/moses/Util.h b/moses/Util.h index 59f43c709..8a21a9b23 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -428,7 +428,7 @@ inline float CalcTranslationScore(const std::vector &probVector, out << *this; \ return out.str(); \ } \ - + //! delete and remove every element of a collection object such as set, list etc template void RemoveAllInColl(COLL &coll) diff --git a/moses2/TranslationModel/ProbingPT/line_splitter.hh b/moses2/TranslationModel/ProbingPT/line_splitter.hh index 3b086b44a..0b91fed09 100644 --- a/moses2/TranslationModel/ProbingPT/line_splitter.hh +++ b/moses2/TranslationModel/ProbingPT/line_splitter.hh @@ -13,8 +13,7 @@ namespace Moses2 { //Struct for holding processed line -struct line_text -{ +struct line_text { StringPiece source_phrase; StringPiece target_phrase; StringPiece prob; @@ -26,8 +25,7 @@ struct line_text }; //Struct for holding processed line -struct target_text -{ +struct target_text { std::vector target_phrase; std::vector prob; std::vector word_align_term; diff --git a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh index 368147807..7e275510a 100644 --- a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh +++ b/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh @@ -13,18 +13,15 @@ namespace Moses2 #define API_VERSION 15 //Hash table entry -struct Entry -{ +struct Entry { typedef uint64_t Key; Key key; - Key GetKey() const - { + Key GetKey() const { return key; } - void SetKey(Key to) - { + void SetKey(Key to) { key = to; } @@ -42,8 +39,7 @@ char * readTable(const char * filename, util::LoadMethod load_method, util::scop uint64_t getKey(const uint64_t source_phrase[], size_t size); -struct TargetPhraseInfo -{ +struct TargetPhraseInfo { uint32_t alignTerm; uint32_t alignNonTerm; uint16_t numWords; diff --git a/moses2/TranslationModel/ProbingPT/querying.hh b/moses2/TranslationModel/ProbingPT/querying.hh index dcdd2a75a..4cb3228f2 100644 --- a/moses2/TranslationModel/ProbingPT/querying.hh +++ b/moses2/TranslationModel/ProbingPT/querying.hh @@ -50,17 +50,18 @@ public: std::pair query(uint64_t key); - const std::map &getSourceVocab() const - { return source_vocabids; } + const std::map &getSourceVocab() const { + return source_vocabids; + } - const std::vector &getAlignments() const - { return alignColl; } + const std::vector &getAlignments() const { + return alignColl; + } uint64_t getKey(uint64_t source_phrase[], size_t size) const; template - inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const - { + inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const { boost::unordered_map::const_iterator iter = keyValue.find(sought); if (iter == keyValue.end()) { return false; diff --git a/moses2/TranslationModel/ProbingPT/storing.hh b/moses2/TranslationModel/ProbingPT/storing.hh index 10d7050d3..a6f8dd143 100644 --- a/moses2/TranslationModel/ProbingPT/storing.hh +++ b/moses2/TranslationModel/ProbingPT/storing.hh @@ -32,7 +32,7 @@ public: bool done; Node() - :done(false) + :done(false) {} void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0); @@ -41,8 +41,8 @@ public: void createProbingPT(const std::string &phrasetable_path, - const std::string &basepath, int num_scores, int num_lex_scores, - bool log_prob, int max_cache_size, bool scfg); + const std::string &basepath, int num_scores, int num_lex_scores, + bool log_prob, int max_cache_size, bool scfg); uint64_t getKey(const std::vector &source_phrase); std::vector CreatePrefix(const std::vector &vocabid_source, size_t endPos); @@ -66,14 +66,12 @@ public: uint64_t sourceKey; float count; CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount) - :source(vSource) - ,sourceKey(vSourceKey) - ,count(vCount) - { + :source(vSource) + ,sourceKey(vSourceKey) + ,count(vCount) { } - bool operator<(const CacheItem &other) const - { + bool operator<(const CacheItem &other) const { return count > other.count; } }; @@ -81,15 +79,14 @@ public: class CacheItemOrderer { public: - bool operator()(const CacheItem* a, const CacheItem* b) const - { + bool operator()(const CacheItem* a, const CacheItem* b) const { return (*a) < (*b); } }; void serialize_cache( - std::priority_queue, CacheItemOrderer> &cache, - const std::string &path, float totalSourceCount); + std::priority_queue, CacheItemOrderer> &cache, + const std::string &path, float totalSourceCount); } diff --git a/moses2/TranslationModel/ProbingPT/vocabid.hh b/moses2/TranslationModel/ProbingPT/vocabid.hh index 55d99d453..9d37bd1b5 100644 --- a/moses2/TranslationModel/ProbingPT/vocabid.hh +++ b/moses2/TranslationModel/ProbingPT/vocabid.hh @@ -19,10 +19,10 @@ template class StoreVocab; void add_to_map(StoreVocab &sourceVocab, - const StringPiece &textin); + const StringPiece &textin); void serialize_map(const std::map &karta, - const std::string &filename); + const std::string &filename); void read_map(std::map &karta, const char* filename); diff --git a/moses2/pugiconfig.hpp b/moses2/pugiconfig.hpp index e50b580bf..1e3bdd1f3 100644 --- a/moses2/pugiconfig.hpp +++ b/moses2/pugiconfig.hpp @@ -62,7 +62,7 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/moses2/pugixml.cpp b/moses2/pugixml.cpp index 38f657cb0..a39f25880 100644 --- a/moses2/pugixml.cpp +++ b/moses2/pugixml.cpp @@ -54,7 +54,7 @@ #endif #ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced +# pragma warning(disable: 177) // function was declared but never referenced # pragma warning(disable: 279) // controlling expression is constant # pragma warning(disable: 1478 1786) // function was declared "deprecated" # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type diff --git a/moses2/pugixml.hpp b/moses2/pugixml.hpp index 9f7c3fbcf..13bf7917b 100644 --- a/moses2/pugixml.hpp +++ b/moses2/pugixml.hpp @@ -83,1285 +83,1276 @@ namespace pugi { - // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef PUGIXML_CHAR char_t; +// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE +typedef PUGIXML_CHAR char_t; #ifndef PUGIXML_NO_STL - // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; +// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE +typedef std::basic_string, std::allocator > string_t; #endif } // The PugiXML namespace namespace pugi { - // Tree node types - enum xml_node_type - { - node_null, // Empty (null) node handle - node_document, // A document tree's absolute root - node_element, // Element tag, i.e. '' - node_pcdata, // Plain character data, i.e. 'text' - node_cdata, // Character data, i.e. '' - node_comment, // Comment tag, i.e. '' - node_pi, // Processing instruction, i.e. '' - node_declaration, // Document declaration, i.e. '' - node_doctype // Document type declaration, i.e. '' - }; - - // Parsing options - - // Minimal parsing mode (equivalent to turning all other flags off). - // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed. - const unsigned int parse_minimal = 0x0000; - - // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default. - const unsigned int parse_pi = 0x0001; - - // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default. - const unsigned int parse_comments = 0x0002; - - // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default. - const unsigned int parse_cdata = 0x0004; - - // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree. - // This flag is off by default; turning it on usually results in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata = 0x0008; - - // This flag determines if character and entity references are expanded during parsing. This flag is on by default. - const unsigned int parse_escapes = 0x0010; - - // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default. - const unsigned int parse_eol = 0x0020; - - // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default. - const unsigned int parse_wconv_attribute = 0x0040; - - // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default. - const unsigned int parse_wnorm_attribute = 0x0080; - - // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. - const unsigned int parse_declaration = 0x0100; - - // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default. - const unsigned int parse_doctype = 0x0200; - - // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only - // of whitespace is added to the DOM tree. - // This flag is off by default; turning it on may result in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata_single = 0x0400; - - // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default. - const unsigned int parse_trim_pcdata = 0x0800; - - // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document - // is a valid document. This flag is off by default. - const unsigned int parse_fragment = 0x1000; - - // The default parsing mode. - // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, - // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; - - // The full parsing mode. - // Nodes of all types are added to the DOM tree, character/reference entities are expanded, - // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; - - // These flags determine the encoding of input data for XML document - enum xml_encoding - { - encoding_auto, // Auto-detect input encoding using BOM or < / class xml_object_range - { - public: - typedef It const_iterator; - typedef It iterator; - - xml_object_range(It b, It e): _begin(b), _end(e) - { - } - - It begin() const { return _begin; } - It end() const { return _end; } - - private: - It _begin, _end; - }; - - // Writer interface for node printing (see xml_node::print) - class PUGIXML_CLASS xml_writer - { - public: - virtual ~xml_writer() {} - - // Write memory chunk into stream/file/whatever - virtual void write(const void* data, size_t size) = 0; - }; - - // xml_writer implementation for FILE* - class PUGIXML_CLASS xml_writer_file: public xml_writer - { - public: - // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio - xml_writer_file(void* file); - - virtual void write(const void* data, size_t size); - - private: - void* file; - }; - - #ifndef PUGIXML_NO_STL - // xml_writer implementation for streams - class PUGIXML_CLASS xml_writer_stream: public xml_writer - { - public: - // Construct writer from an output stream object - xml_writer_stream(std::basic_ostream >& stream); - xml_writer_stream(std::basic_ostream >& stream); - - virtual void write(const void* data, size_t size); - - private: - std::basic_ostream >* narrow_stream; - std::basic_ostream >* wide_stream; - }; - #endif - - // A light-weight handle for manipulating attributes in DOM tree - class PUGIXML_CLASS xml_attribute - { - friend class xml_attribute_iterator; - friend class xml_node; - - private: - xml_attribute_struct* _attr; - - typedef void (*unspecified_bool_type)(xml_attribute***); - - public: - // Default constructor. Constructs an empty attribute. - xml_attribute(); - - // Constructs attribute from internal pointer - explicit xml_attribute(xml_attribute_struct* attr); - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators (compares wrapped attribute pointers) - bool operator==(const xml_attribute& r) const; - bool operator!=(const xml_attribute& r) const; - bool operator<(const xml_attribute& r) const; - bool operator>(const xml_attribute& r) const; - bool operator<=(const xml_attribute& r) const; - bool operator>=(const xml_attribute& r) const; - - // Check if attribute is empty - bool empty() const; - - // Get attribute name/value, or "" if attribute is empty - const char_t* name() const; - const char_t* value() const; - - // Get attribute value, or the default value if attribute is empty - const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; - - // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty - int as_int(int def = 0) const; - unsigned int as_uint(unsigned int def = 0) const; - double as_double(double def = 0) const; - float as_float(float def = 0) const; - - #ifdef PUGIXML_HAS_LONG_LONG - long long as_llong(long long def = 0) const; - unsigned long long as_ullong(unsigned long long def = 0) const; - #endif - - // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty - bool as_bool(bool def = false) const; - - // Set attribute name/value (returns false if attribute is empty or there is not enough memory) - bool set_name(const char_t* rhs); - bool set_value(const char_t* rhs); - - // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") - bool set_value(int rhs); - bool set_value(unsigned int rhs); - bool set_value(double rhs); - bool set_value(float rhs); - bool set_value(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - bool set_value(long long rhs); - bool set_value(unsigned long long rhs); - #endif - - // Set attribute value (equivalent to set_value without error checking) - xml_attribute& operator=(const char_t* rhs); - xml_attribute& operator=(int rhs); - xml_attribute& operator=(unsigned int rhs); - xml_attribute& operator=(double rhs); - xml_attribute& operator=(float rhs); - xml_attribute& operator=(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - xml_attribute& operator=(long long rhs); - xml_attribute& operator=(unsigned long long rhs); - #endif - - // Get next/previous attribute in the attribute list of the parent node - xml_attribute next_attribute() const; - xml_attribute previous_attribute() const; - - // Get hash value (unique for handles to the same object) - size_t hash_value() const; - - // Get internal pointer - xml_attribute_struct* internal_object() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); -#endif - - // A light-weight handle for manipulating nodes in DOM tree - class PUGIXML_CLASS xml_node - { - friend class xml_attribute_iterator; - friend class xml_node_iterator; - friend class xml_named_node_iterator; - - protected: - xml_node_struct* _root; - - typedef void (*unspecified_bool_type)(xml_node***); - - public: - // Default constructor. Constructs an empty node. - xml_node(); - - // Constructs node from internal pointer - explicit xml_node(xml_node_struct* p); - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators (compares wrapped node pointers) - bool operator==(const xml_node& r) const; - bool operator!=(const xml_node& r) const; - bool operator<(const xml_node& r) const; - bool operator>(const xml_node& r) const; - bool operator<=(const xml_node& r) const; - bool operator>=(const xml_node& r) const; - - // Check if node is empty. - bool empty() const; - - // Get node type - xml_node_type type() const; - - // Get node name, or "" if node is empty or it has no name - const char_t* name() const; - - // Get node value, or "" if node is empty or it has no value - // Note: For text node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes. - const char_t* value() const; - - // Get attribute list - xml_attribute first_attribute() const; - xml_attribute last_attribute() const; - - // Get children list - xml_node first_child() const; - xml_node last_child() const; - - // Get next/previous sibling in the children list of the parent node - xml_node next_sibling() const; - xml_node previous_sibling() const; - - // Get parent node - xml_node parent() const; - - // Get root of DOM tree this node belongs to - xml_node root() const; - - // Get text object for the current node - xml_text text() const; - - // Get child, attribute or next/previous sibling with the specified name - xml_node child(const char_t* name) const; - xml_attribute attribute(const char_t* name) const; - xml_node next_sibling(const char_t* name) const; - xml_node previous_sibling(const char_t* name) const; - - // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast) - xml_attribute attribute(const char_t* name, xml_attribute& hint) const; - - // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA - const char_t* child_value() const; - - // Get child value of child with specified name. Equivalent to child(name).child_value(). - const char_t* child_value(const char_t* name) const; - - // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value) - bool set_name(const char_t* rhs); - bool set_value(const char_t* rhs); - - // Add attribute with specified name. Returns added attribute, or empty attribute on errors. - xml_attribute append_attribute(const char_t* name); - xml_attribute prepend_attribute(const char_t* name); - xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); - xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); - - // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors. - xml_attribute append_copy(const xml_attribute& proto); - xml_attribute prepend_copy(const xml_attribute& proto); - xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - - // Add child node with specified type. Returns added node, or empty node on errors. - xml_node append_child(xml_node_type type = node_element); - xml_node prepend_child(xml_node_type type = node_element); - xml_node insert_child_after(xml_node_type type, const xml_node& node); - xml_node insert_child_before(xml_node_type type, const xml_node& node); - - // Add child element with specified name. Returns added node, or empty node on errors. - xml_node append_child(const char_t* name); - xml_node prepend_child(const char_t* name); - xml_node insert_child_after(const char_t* name, const xml_node& node); - xml_node insert_child_before(const char_t* name, const xml_node& node); - - // Add a copy of the specified node as a child. Returns added node, or empty node on errors. - xml_node append_copy(const xml_node& proto); - xml_node prepend_copy(const xml_node& proto); - xml_node insert_copy_after(const xml_node& proto, const xml_node& node); - xml_node insert_copy_before(const xml_node& proto, const xml_node& node); - - // Move the specified node to become a child of this node. Returns moved node, or empty node on errors. - xml_node append_move(const xml_node& moved); - xml_node prepend_move(const xml_node& moved); - xml_node insert_move_after(const xml_node& moved, const xml_node& node); - xml_node insert_move_before(const xml_node& moved, const xml_node& node); - - // Remove specified attribute - bool remove_attribute(const xml_attribute& a); - bool remove_attribute(const char_t* name); - - // Remove specified child - bool remove_child(const xml_node& n); - bool remove_child(const char_t* name); - - // Parses buffer as an XML document fragment and appends all nodes as children of the current node. - // Copies/converts the buffer, so it may be deleted or changed after the function returns. - // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory. - xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Find attribute using predicate. Returns first attribute for which predicate returned true. - template xml_attribute find_attribute(Predicate pred) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) - if (pred(attrib)) - return attrib; - - return xml_attribute(); - } - - // Find child node using predicate. Returns first child for which predicate returned true. - template xml_node find_child(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - if (pred(node)) - return node; - - return xml_node(); - } - - // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true. - template xml_node find_node(Predicate pred) const - { - if (!_root) return xml_node(); - - xml_node cur = first_child(); - - while (cur._root && cur._root != _root) - { - if (pred(cur)) return cur; - - if (cur.first_child()) cur = cur.first_child(); - else if (cur.next_sibling()) cur = cur.next_sibling(); - else - { - while (!cur.next_sibling() && cur._root != _root) cur = cur.parent(); - - if (cur._root != _root) cur = cur.next_sibling(); - } - } - - return xml_node(); - } - - // Find child node by attribute name/value - xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - - #ifndef PUGIXML_NO_STL - // Get the absolute node path from root as a text string. - string_t path(char_t delimiter = '/') const; - #endif - - // Search for a node by path consisting of node names and . or .. elements. - xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; - - // Recursively traverse subtree with xml_tree_walker - bool traverse(xml_tree_walker& walker); - - #ifndef PUGIXML_NO_XPATH - // Select single node by evaluating XPath query. Returns first node from the resulting node set. - xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_node(const xpath_query& query) const; - - // Select node set by evaluating XPath query - xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node_set select_nodes(const xpath_query& query) const; - - // (deprecated: use select_node instead) Select single node by evaluating XPath query. - xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_single_node(const xpath_query& query) const; - - #endif - - // Print subtree using a writer object - void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - #ifndef PUGIXML_NO_STL - // Print subtree to stream - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; - #endif - - // Child nodes iterators - typedef xml_node_iterator iterator; - - iterator begin() const; - iterator end() const; - - // Attribute iterators - typedef xml_attribute_iterator attribute_iterator; - - attribute_iterator attributes_begin() const; - attribute_iterator attributes_end() const; - - // Range-based for support - xml_object_range children() const; - xml_object_range children(const char_t* name) const; - xml_object_range attributes() const; - - // Get node offset in parsed file/string (in char_t units) for debugging purposes - ptrdiff_t offset_debug() const; - - // Get hash value (unique for handles to the same object) - size_t hash_value() const; - - // Get internal pointer - xml_node_struct* internal_object() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); -#endif - - // A helper for working with text inside PCDATA nodes - class PUGIXML_CLASS xml_text - { - friend class xml_node; - - xml_node_struct* _root; - - typedef void (*unspecified_bool_type)(xml_text***); - - explicit xml_text(xml_node_struct* root); - - xml_node_struct* _data_new(); - xml_node_struct* _data() const; - - public: - // Default constructor. Constructs an empty object. - xml_text(); - - // Safe bool conversion operator - operator unspecified_bool_type() const; +// Tree node types +enum xml_node_type { + node_null, // Empty (null) node handle + node_document, // A document tree's absolute root + node_element, // Element tag, i.e. '' + node_pcdata, // Plain character data, i.e. 'text' + node_cdata, // Character data, i.e. '' + node_comment, // Comment tag, i.e. '' + node_pi, // Processing instruction, i.e. '' + node_declaration, // Document declaration, i.e. '' + node_doctype // Document type declaration, i.e. '' +}; - // Borland C++ workaround - bool operator!() const; +// Parsing options - // Check if text object is empty - bool empty() const; +// Minimal parsing mode (equivalent to turning all other flags off). +// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed. +const unsigned int parse_minimal = 0x0000; - // Get text, or "" if object is empty - const char_t* get() const; - - // Get text, or the default value if object is empty - const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; - - // Get text as a number, or the default value if conversion did not succeed or object is empty - int as_int(int def = 0) const; - unsigned int as_uint(unsigned int def = 0) const; - double as_double(double def = 0) const; - float as_float(float def = 0) const; +// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default. +const unsigned int parse_pi = 0x0001; - #ifdef PUGIXML_HAS_LONG_LONG - long long as_llong(long long def = 0) const; - unsigned long long as_ullong(unsigned long long def = 0) const; - #endif - - // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty - bool as_bool(bool def = false) const; +// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default. +const unsigned int parse_comments = 0x0002; - // Set text (returns false if object is empty or there is not enough memory) - bool set(const char_t* rhs); +// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default. +const unsigned int parse_cdata = 0x0004; - // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") - bool set(int rhs); - bool set(unsigned int rhs); - bool set(double rhs); - bool set(float rhs); - bool set(bool rhs); +// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree. +// This flag is off by default; turning it on usually results in slower parsing and more memory consumption. +const unsigned int parse_ws_pcdata = 0x0008; - #ifdef PUGIXML_HAS_LONG_LONG - bool set(long long rhs); - bool set(unsigned long long rhs); - #endif +// This flag determines if character and entity references are expanded during parsing. This flag is on by default. +const unsigned int parse_escapes = 0x0010; - // Set text (equivalent to set without error checking) - xml_text& operator=(const char_t* rhs); - xml_text& operator=(int rhs); - xml_text& operator=(unsigned int rhs); - xml_text& operator=(double rhs); - xml_text& operator=(float rhs); - xml_text& operator=(bool rhs); +// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default. +const unsigned int parse_eol = 0x0020; - #ifdef PUGIXML_HAS_LONG_LONG - xml_text& operator=(long long rhs); - xml_text& operator=(unsigned long long rhs); - #endif +// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default. +const unsigned int parse_wconv_attribute = 0x0040; - // Get the data node (node_pcdata or node_cdata) for this object - xml_node data() const; - }; +// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default. +const unsigned int parse_wnorm_attribute = 0x0080; -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs); -#endif +// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. +const unsigned int parse_declaration = 0x0100; - // Child node iterator (a bidirectional iterator over a collection of xml_node) - class PUGIXML_CLASS xml_node_iterator - { - friend class xml_node; +// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default. +const unsigned int parse_doctype = 0x0200; - private: - mutable xml_node _wrap; - xml_node _parent; +// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only +// of whitespace is added to the DOM tree. +// This flag is off by default; turning it on may result in slower parsing and more memory consumption. +const unsigned int parse_ws_pcdata_single = 0x0400; - xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); +// This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default. +const unsigned int parse_trim_pcdata = 0x0800; - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; +// This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document +// is a valid document. This flag is off by default. +const unsigned int parse_fragment = 0x1000; - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif +// The default parsing mode. +// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, +// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. +const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; - // Default constructor - xml_node_iterator(); +// The full parsing mode. +// Nodes of all types are added to the DOM tree, character/reference entities are expanded, +// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. +const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; - // Construct an iterator which points to the specified node - xml_node_iterator(const xml_node& node); +// These flags determine the encoding of input data for XML document +enum xml_encoding { + encoding_auto, // Auto-detect input encoding using BOM or < / () const; - - const xml_node_iterator& operator++(); - xml_node_iterator operator++(int); - - const xml_node_iterator& operator--(); - xml_node_iterator operator--(int); - }; - - // Attribute iterator (a bidirectional iterator over a collection of xml_attribute) - class PUGIXML_CLASS xml_attribute_iterator - { - friend class xml_node; - - private: - mutable xml_attribute _wrap; - xml_node _parent; - - xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); - - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_attribute value_type; - typedef xml_attribute* pointer; - typedef xml_attribute& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - // Default constructor - xml_attribute_iterator(); - - // Construct an iterator which points to the specified attribute - xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); - - // Iterator operators - bool operator==(const xml_attribute_iterator& rhs) const; - bool operator!=(const xml_attribute_iterator& rhs) const; - - xml_attribute& operator*() const; - xml_attribute* operator->() const; - - const xml_attribute_iterator& operator++(); - xml_attribute_iterator operator++(int); - - const xml_attribute_iterator& operator--(); - xml_attribute_iterator operator--(int); - }; - - // Named node range helper - class PUGIXML_CLASS xml_named_node_iterator - { - friend class xml_node; - - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif +// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default. +const unsigned int format_indent = 0x01; - // Default constructor - xml_named_node_iterator(); +// Write encoding-specific BOM to the output stream. This flag is off by default. +const unsigned int format_write_bom = 0x02; - // Construct an iterator which points to the specified node - xml_named_node_iterator(const xml_node& node, const char_t* name); - - // Iterator operators - bool operator==(const xml_named_node_iterator& rhs) const; - bool operator!=(const xml_named_node_iterator& rhs) const; - - xml_node& operator*() const; - xml_node* operator->() const; - - const xml_named_node_iterator& operator++(); - xml_named_node_iterator operator++(int); - - const xml_named_node_iterator& operator--(); - xml_named_node_iterator operator--(int); - - private: - mutable xml_node _wrap; - xml_node _parent; - const char_t* _name; +// Use raw output mode (no indentation and no line breaks are written). This flag is off by default. +const unsigned int format_raw = 0x04; - xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name); - }; +// Omit default XML declaration even if there is no declaration in the document. This flag is off by default. +const unsigned int format_no_declaration = 0x08; - // Abstract tree walker class (see xml_node::traverse) - class PUGIXML_CLASS xml_tree_walker - { - friend class xml_node; +// Don't escape attribute values and PCDATA contents. This flag is off by default. +const unsigned int format_no_escapes = 0x10; - private: - int _depth; - - protected: - // Get current traversal depth - int depth() const; - - public: - xml_tree_walker(); - virtual ~xml_tree_walker(); - - // Callback that is called when traversal begins - virtual bool begin(xml_node& node); +// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default. +const unsigned int format_save_file_text = 0x20; - // Callback that is called for each node traversed - virtual bool for_each(xml_node& node) = 0; +// Write every attribute on a new line with appropriate indentation. This flag is off by default. +const unsigned int format_indent_attributes = 0x40; - // Callback that is called when traversal ends - virtual bool end(xml_node& node); - }; +// The default set of formatting flags. +// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. +const unsigned int format_default = format_indent; - // Parsing status, returned as part of xml_parse_result object - enum xml_parse_status - { - status_ok = 0, // No error +// Forward declarations +struct xml_attribute_struct; +struct xml_node_struct; - status_file_not_found, // File was not found during load_file() - status_io_error, // Error reading from file/stream - status_out_of_memory, // Could not allocate memory - status_internal_error, // Internal error occurred +class xml_node_iterator; +class xml_attribute_iterator; +class xml_named_node_iterator; - status_unrecognized_tag, // Parser could not determine tag type - - status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction - status_bad_comment, // Parsing error occurred while parsing comment - status_bad_cdata, // Parsing error occurred while parsing CDATA section - status_bad_doctype, // Parsing error occurred while parsing document type declaration - status_bad_pcdata, // Parsing error occurred while parsing PCDATA section - status_bad_start_element, // Parsing error occurred while parsing start element tag - status_bad_attribute, // Parsing error occurred while parsing element attribute - status_bad_end_element, // Parsing error occurred while parsing end element tag - status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) - - status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer) - - status_no_document_element // Parsing resulted in a document without element nodes - }; - - // Parsing result - struct PUGIXML_CLASS xml_parse_result - { - // Parsing status (see xml_parse_status) - xml_parse_status status; - - // Last parsed offset (in char_t units from start of input data) - ptrdiff_t offset; - - // Source document encoding - xml_encoding encoding; - - // Default constructor, initializes object to failed state - xml_parse_result(); +class xml_tree_walker; - // Cast to bool operator - operator bool() const; - - // Get error description - const char* description() const; - }; - - // Document class (DOM tree root) - class PUGIXML_CLASS xml_document: public xml_node - { - private: - char_t* _buffer; - - char _memory[192]; - - // Non-copyable semantics - xml_document(const xml_document&); - xml_document& operator=(const xml_document&); - - void create(); - void destroy(); - - public: - // Default constructor, makes empty document - xml_document(); - - // Destructor, invalidates all node/attribute handles to this document - ~xml_document(); - - // Removes all nodes, leaving the empty document - void reset(); - - // Removes all nodes, then copies the entire contents of the specified document - void reset(const xml_document& proto); - - #ifndef PUGIXML_NO_STL - // Load document from stream. - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); - #endif - - // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. - xml_parse_result load(const char_t* contents, unsigned int options = parse_default); +struct xml_parse_result; - // Load document from zero-terminated string. No encoding conversions are applied. - xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); +class xml_node; - // Load document from file - xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns. - xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). - // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed. - xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). - // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore). - xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details). - void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - #ifndef PUGIXML_NO_STL - // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details). - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; - #endif - - // Save XML to file - bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - // Get document element - xml_node document_element() const; - }; +class xml_text; #ifndef PUGIXML_NO_XPATH - // XPath query return type - enum xpath_value_type - { - xpath_type_none, // Unknown type (query failed to compile) - xpath_type_node_set, // Node set (xpath_node_set) - xpath_type_number, // Number - xpath_type_string, // String - xpath_type_boolean // Boolean - }; - - // XPath parsing result - struct PUGIXML_CLASS xpath_parse_result - { - // Error message (0 if no error) - const char* error; - - // Last parsed offset (in char_t units from string start) - ptrdiff_t offset; - - // Default constructor, initializes object to failed state - xpath_parse_result(); - - // Cast to bool operator - operator bool() const; - - // Get error description - const char* description() const; - }; - - // A single XPath variable - class PUGIXML_CLASS xpath_variable - { - friend class xpath_variable_set; - - protected: - xpath_value_type _type; - xpath_variable* _next; - - xpath_variable(xpath_value_type type); - - // Non-copyable semantics - xpath_variable(const xpath_variable&); - xpath_variable& operator=(const xpath_variable&); - - public: - // Get variable name - const char_t* name() const; - - // Get variable type - xpath_value_type type() const; - - // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error - bool get_boolean() const; - double get_number() const; - const char_t* get_string() const; - const xpath_node_set& get_node_set() const; - - // Set variable value; no type conversion is performed, false is returned on type mismatch error - bool set(bool value); - bool set(double value); - bool set(const char_t* value); - bool set(const xpath_node_set& value); - }; - - // A set of XPath variables - class PUGIXML_CLASS xpath_variable_set - { - private: - xpath_variable* _data[64]; - - void _assign(const xpath_variable_set& rhs); - void _swap(xpath_variable_set& rhs); - - xpath_variable* _find(const char_t* name) const; - - static bool _clone(xpath_variable* var, xpath_variable** out_result); - static void _destroy(xpath_variable* var); - - public: - // Default constructor/destructor - xpath_variable_set(); - ~xpath_variable_set(); - - // Copy constructor/assignment operator - xpath_variable_set(const xpath_variable_set& rhs); - xpath_variable_set& operator=(const xpath_variable_set& rhs); - - #if __cplusplus >= 201103 - // Move semantics support - xpath_variable_set(xpath_variable_set&& rhs); - xpath_variable_set& operator=(xpath_variable_set&& rhs); - #endif - - // Add a new variable or get the existing one, if the types match - xpath_variable* add(const char_t* name, xpath_value_type type); - - // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch - bool set(const char_t* name, bool value); - bool set(const char_t* name, double value); - bool set(const char_t* name, const char_t* value); - bool set(const char_t* name, const xpath_node_set& value); - - // Get existing variable by name - xpath_variable* get(const char_t* name); - const xpath_variable* get(const char_t* name) const; - }; - - // A compiled XPath query object - class PUGIXML_CLASS xpath_query - { - private: - void* _impl; - xpath_parse_result _result; - - typedef void (*unspecified_bool_type)(xpath_query***); - - // Non-copyable semantics - xpath_query(const xpath_query&); - xpath_query& operator=(const xpath_query&); - - public: - // Construct a compiled object from XPath expression. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors. - explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0); - - // Constructor - xpath_query(); - - // Destructor - ~xpath_query(); - - #if __cplusplus >= 201103 - // Move semantics support - xpath_query(xpath_query&& rhs); - xpath_query& operator=(xpath_query&& rhs); - #endif - - // Get query expression return type - xpath_value_type return_type() const; - - // Evaluate expression as boolean value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - bool evaluate_boolean(const xpath_node& n) const; - - // Evaluate expression as double value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - double evaluate_number(const xpath_node& n) const; - - #ifndef PUGIXML_NO_STL - // Evaluate expression as string value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - string_t evaluate_string(const xpath_node& n) const; - #endif - - // Evaluate expression as string value in the specified context; performs type conversion if necessary. - // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero). - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead. - size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const; - - // Evaluate expression as node set in the specified context. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead. - xpath_node_set evaluate_node_set(const xpath_node& n) const; - - // Evaluate expression as node set in the specified context. - // Return first node in document order, or empty node if node set is empty. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead. - xpath_node evaluate_node(const xpath_node& n) const; - - // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode) - const xpath_parse_result& result() const; - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - }; - - #ifndef PUGIXML_NO_EXCEPTIONS - // XPath exception class - class PUGIXML_CLASS xpath_exception: public std::exception - { - private: - xpath_parse_result _result; - - public: - // Construct exception from parse result - explicit xpath_exception(const xpath_parse_result& result); - - // Get error message - virtual const char* what() const throw(); - - // Get parse result - const xpath_parse_result& result() const; - }; - #endif - - // XPath node class (either xml_node or xml_attribute) - class PUGIXML_CLASS xpath_node - { - private: - xml_node _node; - xml_attribute _attribute; - - typedef void (*unspecified_bool_type)(xpath_node***); - - public: - // Default constructor; constructs empty XPath node - xpath_node(); - - // Construct XPath node from XML node/attribute - xpath_node(const xml_node& node); - xpath_node(const xml_attribute& attribute, const xml_node& parent); - - // Get node/attribute, if any - xml_node node() const; - xml_attribute attribute() const; - - // Get parent of contained node/attribute - xml_node parent() const; - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators - bool operator==(const xpath_node& n) const; - bool operator!=(const xpath_node& n) const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); +class xpath_node; +class xpath_node_set; +class xpath_query; +class xpath_variable_set; #endif - // A fixed-size collection of XPath nodes - class PUGIXML_CLASS xpath_node_set - { - public: - // Collection type - enum type_t - { - type_unsorted, // Not ordered - type_sorted, // Sorted by document order (ascending) - type_sorted_reverse // Sorted by document order (descending) - }; - - // Constant iterator type - typedef const xpath_node* const_iterator; +// Range-based for loop support +template class xml_object_range +{ +public: + typedef It const_iterator; + typedef It iterator; - // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work - typedef const xpath_node* iterator; - - // Default constructor. Constructs empty set. - xpath_node_set(); + xml_object_range(It b, It e): _begin(b), _end(e) { + } - // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful - xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted); + It begin() const { + return _begin; + } + It end() const { + return _end; + } - // Destructor - ~xpath_node_set(); - - // Copy constructor/assignment operator - xpath_node_set(const xpath_node_set& ns); - xpath_node_set& operator=(const xpath_node_set& ns); +private: + It _begin, _end; +}; - #if __cplusplus >= 201103 - // Move semantics support - xpath_node_set(xpath_node_set&& rhs); - xpath_node_set& operator=(xpath_node_set&& rhs); - #endif +// Writer interface for node printing (see xml_node::print) +class PUGIXML_CLASS xml_writer +{ +public: + virtual ~xml_writer() {} - // Get collection type - type_t type() const; - - // Get collection size - size_t size() const; + // Write memory chunk into stream/file/whatever + virtual void write(const void* data, size_t size) = 0; +}; - // Indexing operator - const xpath_node& operator[](size_t index) const; - - // Collection iterators - const_iterator begin() const; - const_iterator end() const; +// xml_writer implementation for FILE* +class PUGIXML_CLASS xml_writer_file: public xml_writer +{ +public: + // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio + xml_writer_file(void* file); - // Sort the collection in ascending/descending order by document order - void sort(bool reverse = false); - - // Get first node in the collection by document order - xpath_node first() const; - - // Check if collection is empty - bool empty() const; - - private: - type_t _type; - - xpath_node _storage; - - xpath_node* _begin; - xpath_node* _end; + virtual void write(const void* data, size_t size); - void _assign(const_iterator begin, const_iterator end, type_t type); - void _move(xpath_node_set& rhs); - }; +private: + void* file; +}; + +#ifndef PUGIXML_NO_STL +// xml_writer implementation for streams +class PUGIXML_CLASS xml_writer_stream: public xml_writer +{ +public: + // Construct writer from an output stream object + xml_writer_stream(std::basic_ostream >& stream); + xml_writer_stream(std::basic_ostream >& stream); + + virtual void write(const void* data, size_t size); + +private: + std::basic_ostream >* narrow_stream; + std::basic_ostream >* wide_stream; +}; +#endif + +// A light-weight handle for manipulating attributes in DOM tree +class PUGIXML_CLASS xml_attribute +{ + friend class xml_attribute_iterator; + friend class xml_node; + +private: + xml_attribute_struct* _attr; + + typedef void (*unspecified_bool_type)(xml_attribute***); + +public: + // Default constructor. Constructs an empty attribute. + xml_attribute(); + + // Constructs attribute from internal pointer + explicit xml_attribute(xml_attribute_struct* attr); + + // Safe bool conversion operator + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + // Comparison operators (compares wrapped attribute pointers) + bool operator==(const xml_attribute& r) const; + bool operator!=(const xml_attribute& r) const; + bool operator<(const xml_attribute& r) const; + bool operator>(const xml_attribute& r) const; + bool operator<=(const xml_attribute& r) const; + bool operator>=(const xml_attribute& r) const; + + // Check if attribute is empty + bool empty() const; + + // Get attribute name/value, or "" if attribute is empty + const char_t* name() const; + const char_t* value() const; + + // Get attribute value, or the default value if attribute is empty + const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; + + // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty + int as_int(int def = 0) const; + unsigned int as_uint(unsigned int def = 0) const; + double as_double(double def = 0) const; + float as_float(float def = 0) const; + +#ifdef PUGIXML_HAS_LONG_LONG + long long as_llong(long long def = 0) const; + unsigned long long as_ullong(unsigned long long def = 0) const; +#endif + + // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty + bool as_bool(bool def = false) const; + + // Set attribute name/value (returns false if attribute is empty or there is not enough memory) + bool set_name(const char_t* rhs); + bool set_value(const char_t* rhs); + + // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") + bool set_value(int rhs); + bool set_value(unsigned int rhs); + bool set_value(double rhs); + bool set_value(float rhs); + bool set_value(bool rhs); + +#ifdef PUGIXML_HAS_LONG_LONG + bool set_value(long long rhs); + bool set_value(unsigned long long rhs); +#endif + + // Set attribute value (equivalent to set_value without error checking) + xml_attribute& operator=(const char_t* rhs); + xml_attribute& operator=(int rhs); + xml_attribute& operator=(unsigned int rhs); + xml_attribute& operator=(double rhs); + xml_attribute& operator=(float rhs); + xml_attribute& operator=(bool rhs); + +#ifdef PUGIXML_HAS_LONG_LONG + xml_attribute& operator=(long long rhs); + xml_attribute& operator=(unsigned long long rhs); +#endif + + // Get next/previous attribute in the attribute list of the parent node + xml_attribute next_attribute() const; + xml_attribute previous_attribute() const; + + // Get hash value (unique for handles to the same object) + size_t hash_value() const; + + // Get internal pointer + xml_attribute_struct* internal_object() const; +}; + +#ifdef __BORLANDC__ +// Borland C++ workaround +bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); +bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); +#endif + +// A light-weight handle for manipulating nodes in DOM tree +class PUGIXML_CLASS xml_node +{ + friend class xml_attribute_iterator; + friend class xml_node_iterator; + friend class xml_named_node_iterator; + +protected: + xml_node_struct* _root; + + typedef void (*unspecified_bool_type)(xml_node***); + +public: + // Default constructor. Constructs an empty node. + xml_node(); + + // Constructs node from internal pointer + explicit xml_node(xml_node_struct* p); + + // Safe bool conversion operator + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + // Comparison operators (compares wrapped node pointers) + bool operator==(const xml_node& r) const; + bool operator!=(const xml_node& r) const; + bool operator<(const xml_node& r) const; + bool operator>(const xml_node& r) const; + bool operator<=(const xml_node& r) const; + bool operator>=(const xml_node& r) const; + + // Check if node is empty. + bool empty() const; + + // Get node type + xml_node_type type() const; + + // Get node name, or "" if node is empty or it has no name + const char_t* name() const; + + // Get node value, or "" if node is empty or it has no value + // Note: For text node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes. + const char_t* value() const; + + // Get attribute list + xml_attribute first_attribute() const; + xml_attribute last_attribute() const; + + // Get children list + xml_node first_child() const; + xml_node last_child() const; + + // Get next/previous sibling in the children list of the parent node + xml_node next_sibling() const; + xml_node previous_sibling() const; + + // Get parent node + xml_node parent() const; + + // Get root of DOM tree this node belongs to + xml_node root() const; + + // Get text object for the current node + xml_text text() const; + + // Get child, attribute or next/previous sibling with the specified name + xml_node child(const char_t* name) const; + xml_attribute attribute(const char_t* name) const; + xml_node next_sibling(const char_t* name) const; + xml_node previous_sibling(const char_t* name) const; + + // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast) + xml_attribute attribute(const char_t* name, xml_attribute& hint) const; + + // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA + const char_t* child_value() const; + + // Get child value of child with specified name. Equivalent to child(name).child_value(). + const char_t* child_value(const char_t* name) const; + + // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value) + bool set_name(const char_t* rhs); + bool set_value(const char_t* rhs); + + // Add attribute with specified name. Returns added attribute, or empty attribute on errors. + xml_attribute append_attribute(const char_t* name); + xml_attribute prepend_attribute(const char_t* name); + xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); + xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); + + // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors. + xml_attribute append_copy(const xml_attribute& proto); + xml_attribute prepend_copy(const xml_attribute& proto); + xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); + xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); + + // Add child node with specified type. Returns added node, or empty node on errors. + xml_node append_child(xml_node_type type = node_element); + xml_node prepend_child(xml_node_type type = node_element); + xml_node insert_child_after(xml_node_type type, const xml_node& node); + xml_node insert_child_before(xml_node_type type, const xml_node& node); + + // Add child element with specified name. Returns added node, or empty node on errors. + xml_node append_child(const char_t* name); + xml_node prepend_child(const char_t* name); + xml_node insert_child_after(const char_t* name, const xml_node& node); + xml_node insert_child_before(const char_t* name, const xml_node& node); + + // Add a copy of the specified node as a child. Returns added node, or empty node on errors. + xml_node append_copy(const xml_node& proto); + xml_node prepend_copy(const xml_node& proto); + xml_node insert_copy_after(const xml_node& proto, const xml_node& node); + xml_node insert_copy_before(const xml_node& proto, const xml_node& node); + + // Move the specified node to become a child of this node. Returns moved node, or empty node on errors. + xml_node append_move(const xml_node& moved); + xml_node prepend_move(const xml_node& moved); + xml_node insert_move_after(const xml_node& moved, const xml_node& node); + xml_node insert_move_before(const xml_node& moved, const xml_node& node); + + // Remove specified attribute + bool remove_attribute(const xml_attribute& a); + bool remove_attribute(const char_t* name); + + // Remove specified child + bool remove_child(const xml_node& n); + bool remove_child(const char_t* name); + + // Parses buffer as an XML document fragment and appends all nodes as children of the current node. + // Copies/converts the buffer, so it may be deleted or changed after the function returns. + // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory. + xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + // Find attribute using predicate. Returns first attribute for which predicate returned true. + template xml_attribute find_attribute(Predicate pred) const { + if (!_root) return xml_attribute(); + + for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) + if (pred(attrib)) + return attrib; + + return xml_attribute(); + } + + // Find child node using predicate. Returns first child for which predicate returned true. + template xml_node find_child(Predicate pred) const { + if (!_root) return xml_node(); + + for (xml_node node = first_child(); node; node = node.next_sibling()) + if (pred(node)) + return node; + + return xml_node(); + } + + // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true. + template xml_node find_node(Predicate pred) const { + if (!_root) return xml_node(); + + xml_node cur = first_child(); + + while (cur._root && cur._root != _root) { + if (pred(cur)) return cur; + + if (cur.first_child()) cur = cur.first_child(); + else if (cur.next_sibling()) cur = cur.next_sibling(); + else { + while (!cur.next_sibling() && cur._root != _root) cur = cur.parent(); + + if (cur._root != _root) cur = cur.next_sibling(); + } + } + + return xml_node(); + } + + // Find child node by attribute name/value + xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; + xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; + +#ifndef PUGIXML_NO_STL + // Get the absolute node path from root as a text string. + string_t path(char_t delimiter = '/') const; +#endif + + // Search for a node by path consisting of node names and . or .. elements. + xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; + + // Recursively traverse subtree with xml_tree_walker + bool traverse(xml_tree_walker& walker); + +#ifndef PUGIXML_NO_XPATH + // Select single node by evaluating XPath query. Returns first node from the resulting node set. + xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node select_node(const xpath_query& query) const; + + // Select node set by evaluating XPath query + xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node_set select_nodes(const xpath_query& query) const; + + // (deprecated: use select_node instead) Select single node by evaluating XPath query. + xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node select_single_node(const xpath_query& query) const; + +#endif + + // Print subtree using a writer object + void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + +#ifndef PUGIXML_NO_STL + // Print subtree to stream + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; + void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; +#endif + + // Child nodes iterators + typedef xml_node_iterator iterator; + + iterator begin() const; + iterator end() const; + + // Attribute iterators + typedef xml_attribute_iterator attribute_iterator; + + attribute_iterator attributes_begin() const; + attribute_iterator attributes_end() const; + + // Range-based for support + xml_object_range children() const; + xml_object_range children(const char_t* name) const; + xml_object_range attributes() const; + + // Get node offset in parsed file/string (in char_t units) for debugging purposes + ptrdiff_t offset_debug() const; + + // Get hash value (unique for handles to the same object) + size_t hash_value() const; + + // Get internal pointer + xml_node_struct* internal_object() const; +}; + +#ifdef __BORLANDC__ +// Borland C++ workaround +bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); +bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); +#endif + +// A helper for working with text inside PCDATA nodes +class PUGIXML_CLASS xml_text +{ + friend class xml_node; + + xml_node_struct* _root; + + typedef void (*unspecified_bool_type)(xml_text***); + + explicit xml_text(xml_node_struct* root); + + xml_node_struct* _data_new(); + xml_node_struct* _data() const; + +public: + // Default constructor. Constructs an empty object. + xml_text(); + + // Safe bool conversion operator + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + // Check if text object is empty + bool empty() const; + + // Get text, or "" if object is empty + const char_t* get() const; + + // Get text, or the default value if object is empty + const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; + + // Get text as a number, or the default value if conversion did not succeed or object is empty + int as_int(int def = 0) const; + unsigned int as_uint(unsigned int def = 0) const; + double as_double(double def = 0) const; + float as_float(float def = 0) const; + +#ifdef PUGIXML_HAS_LONG_LONG + long long as_llong(long long def = 0) const; + unsigned long long as_ullong(unsigned long long def = 0) const; +#endif + + // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty + bool as_bool(bool def = false) const; + + // Set text (returns false if object is empty or there is not enough memory) + bool set(const char_t* rhs); + + // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") + bool set(int rhs); + bool set(unsigned int rhs); + bool set(double rhs); + bool set(float rhs); + bool set(bool rhs); + +#ifdef PUGIXML_HAS_LONG_LONG + bool set(long long rhs); + bool set(unsigned long long rhs); +#endif + + // Set text (equivalent to set without error checking) + xml_text& operator=(const char_t* rhs); + xml_text& operator=(int rhs); + xml_text& operator=(unsigned int rhs); + xml_text& operator=(double rhs); + xml_text& operator=(float rhs); + xml_text& operator=(bool rhs); + +#ifdef PUGIXML_HAS_LONG_LONG + xml_text& operator=(long long rhs); + xml_text& operator=(unsigned long long rhs); +#endif + + // Get the data node (node_pcdata or node_cdata) for this object + xml_node data() const; +}; + +#ifdef __BORLANDC__ +// Borland C++ workaround +bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs); +bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs); +#endif + +// Child node iterator (a bidirectional iterator over a collection of xml_node) +class PUGIXML_CLASS xml_node_iterator +{ + friend class xml_node; + +private: + mutable xml_node _wrap; + xml_node _parent; + + xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); + +public: + // Iterator traits + typedef ptrdiff_t difference_type; + typedef xml_node value_type; + typedef xml_node* pointer; + typedef xml_node& reference; + +#ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; +#endif + + // Default constructor + xml_node_iterator(); + + // Construct an iterator which points to the specified node + xml_node_iterator(const xml_node& node); + + // Iterator operators + bool operator==(const xml_node_iterator& rhs) const; + bool operator!=(const xml_node_iterator& rhs) const; + + xml_node& operator*() const; + xml_node* operator->() const; + + const xml_node_iterator& operator++(); + xml_node_iterator operator++(int); + + const xml_node_iterator& operator--(); + xml_node_iterator operator--(int); +}; + +// Attribute iterator (a bidirectional iterator over a collection of xml_attribute) +class PUGIXML_CLASS xml_attribute_iterator +{ + friend class xml_node; + +private: + mutable xml_attribute _wrap; + xml_node _parent; + + xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); + +public: + // Iterator traits + typedef ptrdiff_t difference_type; + typedef xml_attribute value_type; + typedef xml_attribute* pointer; + typedef xml_attribute& reference; + +#ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; +#endif + + // Default constructor + xml_attribute_iterator(); + + // Construct an iterator which points to the specified attribute + xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); + + // Iterator operators + bool operator==(const xml_attribute_iterator& rhs) const; + bool operator!=(const xml_attribute_iterator& rhs) const; + + xml_attribute& operator*() const; + xml_attribute* operator->() const; + + const xml_attribute_iterator& operator++(); + xml_attribute_iterator operator++(int); + + const xml_attribute_iterator& operator--(); + xml_attribute_iterator operator--(int); +}; + +// Named node range helper +class PUGIXML_CLASS xml_named_node_iterator +{ + friend class xml_node; + +public: + // Iterator traits + typedef ptrdiff_t difference_type; + typedef xml_node value_type; + typedef xml_node* pointer; + typedef xml_node& reference; + +#ifndef PUGIXML_NO_STL + typedef std::bidirectional_iterator_tag iterator_category; +#endif + + // Default constructor + xml_named_node_iterator(); + + // Construct an iterator which points to the specified node + xml_named_node_iterator(const xml_node& node, const char_t* name); + + // Iterator operators + bool operator==(const xml_named_node_iterator& rhs) const; + bool operator!=(const xml_named_node_iterator& rhs) const; + + xml_node& operator*() const; + xml_node* operator->() const; + + const xml_named_node_iterator& operator++(); + xml_named_node_iterator operator++(int); + + const xml_named_node_iterator& operator--(); + xml_named_node_iterator operator--(int); + +private: + mutable xml_node _wrap; + xml_node _parent; + const char_t* _name; + + xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name); +}; + +// Abstract tree walker class (see xml_node::traverse) +class PUGIXML_CLASS xml_tree_walker +{ + friend class xml_node; + +private: + int _depth; + +protected: + // Get current traversal depth + int depth() const; + +public: + xml_tree_walker(); + virtual ~xml_tree_walker(); + + // Callback that is called when traversal begins + virtual bool begin(xml_node& node); + + // Callback that is called for each node traversed + virtual bool for_each(xml_node& node) = 0; + + // Callback that is called when traversal ends + virtual bool end(xml_node& node); +}; + +// Parsing status, returned as part of xml_parse_result object +enum xml_parse_status { + status_ok = 0, // No error + + status_file_not_found, // File was not found during load_file() + status_io_error, // Error reading from file/stream + status_out_of_memory, // Could not allocate memory + status_internal_error, // Internal error occurred + + status_unrecognized_tag, // Parser could not determine tag type + + status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction + status_bad_comment, // Parsing error occurred while parsing comment + status_bad_cdata, // Parsing error occurred while parsing CDATA section + status_bad_doctype, // Parsing error occurred while parsing document type declaration + status_bad_pcdata, // Parsing error occurred while parsing PCDATA section + status_bad_start_element, // Parsing error occurred while parsing start element tag + status_bad_attribute, // Parsing error occurred while parsing element attribute + status_bad_end_element, // Parsing error occurred while parsing end element tag + status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) + + status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer) + + status_no_document_element // Parsing resulted in a document without element nodes +}; + +// Parsing result +struct PUGIXML_CLASS xml_parse_result { + // Parsing status (see xml_parse_status) + xml_parse_status status; + + // Last parsed offset (in char_t units from start of input data) + ptrdiff_t offset; + + // Source document encoding + xml_encoding encoding; + + // Default constructor, initializes object to failed state + xml_parse_result(); + + // Cast to bool operator + operator bool() const; + + // Get error description + const char* description() const; +}; + +// Document class (DOM tree root) +class PUGIXML_CLASS xml_document: public xml_node +{ +private: + char_t* _buffer; + + char _memory[192]; + + // Non-copyable semantics + xml_document(const xml_document&); + xml_document& operator=(const xml_document&); + + void create(); + void destroy(); + +public: + // Default constructor, makes empty document + xml_document(); + + // Destructor, invalidates all node/attribute handles to this document + ~xml_document(); + + // Removes all nodes, leaving the empty document + void reset(); + + // Removes all nodes, then copies the entire contents of the specified document + void reset(const xml_document& proto); + +#ifndef PUGIXML_NO_STL + // Load document from stream. + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); +#endif + + // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. + xml_parse_result load(const char_t* contents, unsigned int options = parse_default); + + // Load document from zero-terminated string. No encoding conversions are applied. + xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); + + // Load document from file + xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns. + xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). + // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed. + xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). + // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore). + xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); + + // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details). + void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + +#ifndef PUGIXML_NO_STL + // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details). + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; +#endif + + // Save XML to file + bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; + + // Get document element + xml_node document_element() const; +}; + +#ifndef PUGIXML_NO_XPATH +// XPath query return type +enum xpath_value_type { + xpath_type_none, // Unknown type (query failed to compile) + xpath_type_node_set, // Node set (xpath_node_set) + xpath_type_number, // Number + xpath_type_string, // String + xpath_type_boolean // Boolean +}; + +// XPath parsing result +struct PUGIXML_CLASS xpath_parse_result { + // Error message (0 if no error) + const char* error; + + // Last parsed offset (in char_t units from string start) + ptrdiff_t offset; + + // Default constructor, initializes object to failed state + xpath_parse_result(); + + // Cast to bool operator + operator bool() const; + + // Get error description + const char* description() const; +}; + +// A single XPath variable +class PUGIXML_CLASS xpath_variable +{ + friend class xpath_variable_set; + +protected: + xpath_value_type _type; + xpath_variable* _next; + + xpath_variable(xpath_value_type type); + + // Non-copyable semantics + xpath_variable(const xpath_variable&); + xpath_variable& operator=(const xpath_variable&); + +public: + // Get variable name + const char_t* name() const; + + // Get variable type + xpath_value_type type() const; + + // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error + bool get_boolean() const; + double get_number() const; + const char_t* get_string() const; + const xpath_node_set& get_node_set() const; + + // Set variable value; no type conversion is performed, false is returned on type mismatch error + bool set(bool value); + bool set(double value); + bool set(const char_t* value); + bool set(const xpath_node_set& value); +}; + +// A set of XPath variables +class PUGIXML_CLASS xpath_variable_set +{ +private: + xpath_variable* _data[64]; + + void _assign(const xpath_variable_set& rhs); + void _swap(xpath_variable_set& rhs); + + xpath_variable* _find(const char_t* name) const; + + static bool _clone(xpath_variable* var, xpath_variable** out_result); + static void _destroy(xpath_variable* var); + +public: + // Default constructor/destructor + xpath_variable_set(); + ~xpath_variable_set(); + + // Copy constructor/assignment operator + xpath_variable_set(const xpath_variable_set& rhs); + xpath_variable_set& operator=(const xpath_variable_set& rhs); + +#if __cplusplus >= 201103 + // Move semantics support + xpath_variable_set(xpath_variable_set&& rhs); + xpath_variable_set& operator=(xpath_variable_set&& rhs); +#endif + + // Add a new variable or get the existing one, if the types match + xpath_variable* add(const char_t* name, xpath_value_type type); + + // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch + bool set(const char_t* name, bool value); + bool set(const char_t* name, double value); + bool set(const char_t* name, const char_t* value); + bool set(const char_t* name, const xpath_node_set& value); + + // Get existing variable by name + xpath_variable* get(const char_t* name); + const xpath_variable* get(const char_t* name) const; +}; + +// A compiled XPath query object +class PUGIXML_CLASS xpath_query +{ +private: + void* _impl; + xpath_parse_result _result; + + typedef void (*unspecified_bool_type)(xpath_query***); + + // Non-copyable semantics + xpath_query(const xpath_query&); + xpath_query& operator=(const xpath_query&); + +public: + // Construct a compiled object from XPath expression. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors. + explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0); + + // Constructor + xpath_query(); + + // Destructor + ~xpath_query(); + +#if __cplusplus >= 201103 + // Move semantics support + xpath_query(xpath_query&& rhs); + xpath_query& operator=(xpath_query&& rhs); +#endif + + // Get query expression return type + xpath_value_type return_type() const; + + // Evaluate expression as boolean value in the specified context; performs type conversion if necessary. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. + bool evaluate_boolean(const xpath_node& n) const; + + // Evaluate expression as double value in the specified context; performs type conversion if necessary. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. + double evaluate_number(const xpath_node& n) const; + +#ifndef PUGIXML_NO_STL + // Evaluate expression as string value in the specified context; performs type conversion if necessary. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. + string_t evaluate_string(const xpath_node& n) const; +#endif + + // Evaluate expression as string value in the specified context; performs type conversion if necessary. + // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero). + // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. + // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead. + size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const; + + // Evaluate expression as node set in the specified context. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. + // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead. + xpath_node_set evaluate_node_set(const xpath_node& n) const; + + // Evaluate expression as node set in the specified context. + // Return first node in document order, or empty node if node set is empty. + // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. + // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead. + xpath_node evaluate_node(const xpath_node& n) const; + + // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode) + const xpath_parse_result& result() const; + + // Safe bool conversion operator + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; +}; + +#ifndef PUGIXML_NO_EXCEPTIONS +// XPath exception class +class PUGIXML_CLASS xpath_exception: public std::exception +{ +private: + xpath_parse_result _result; + +public: + // Construct exception from parse result + explicit xpath_exception(const xpath_parse_result& result); + + // Get error message + virtual const char* what() const throw(); + + // Get parse result + const xpath_parse_result& result() const; +}; +#endif + +// XPath node class (either xml_node or xml_attribute) +class PUGIXML_CLASS xpath_node +{ +private: + xml_node _node; + xml_attribute _attribute; + + typedef void (*unspecified_bool_type)(xpath_node***); + +public: + // Default constructor; constructs empty XPath node + xpath_node(); + + // Construct XPath node from XML node/attribute + xpath_node(const xml_node& node); + xpath_node(const xml_attribute& attribute, const xml_node& parent); + + // Get node/attribute, if any + xml_node node() const; + xml_attribute attribute() const; + + // Get parent of contained node/attribute + xml_node parent() const; + + // Safe bool conversion operator + operator unspecified_bool_type() const; + + // Borland C++ workaround + bool operator!() const; + + // Comparison operators + bool operator==(const xpath_node& n) const; + bool operator!=(const xpath_node& n) const; +}; + +#ifdef __BORLANDC__ +// Borland C++ workaround +bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); +bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); +#endif + +// A fixed-size collection of XPath nodes +class PUGIXML_CLASS xpath_node_set +{ +public: + // Collection type + enum type_t { + type_unsorted, // Not ordered + type_sorted, // Sorted by document order (ascending) + type_sorted_reverse // Sorted by document order (descending) + }; + + // Constant iterator type + typedef const xpath_node* const_iterator; + + // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work + typedef const xpath_node* iterator; + + // Default constructor. Constructs empty set. + xpath_node_set(); + + // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful + xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted); + + // Destructor + ~xpath_node_set(); + + // Copy constructor/assignment operator + xpath_node_set(const xpath_node_set& ns); + xpath_node_set& operator=(const xpath_node_set& ns); + +#if __cplusplus >= 201103 + // Move semantics support + xpath_node_set(xpath_node_set&& rhs); + xpath_node_set& operator=(xpath_node_set&& rhs); +#endif + + // Get collection type + type_t type() const; + + // Get collection size + size_t size() const; + + // Indexing operator + const xpath_node& operator[](size_t index) const; + + // Collection iterators + const_iterator begin() const; + const_iterator end() const; + + // Sort the collection in ascending/descending order by document order + void sort(bool reverse = false); + + // Get first node in the collection by document order + xpath_node first() const; + + // Check if collection is empty + bool empty() const; + +private: + type_t _type; + + xpath_node _storage; + + xpath_node* _begin; + xpath_node* _end; + + void _assign(const_iterator begin, const_iterator end, type_t type); + void _move(xpath_node_set& rhs); +}; #endif #ifndef PUGIXML_NO_STL - // Convert wide string to UTF8 - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const std::basic_string, std::allocator >& str); - - // Convert UTF8 to wide string - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const std::basic_string, std::allocator >& str); +// Convert wide string to UTF8 +std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); +std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const std::basic_string, std::allocator >& str); + +// Convert UTF8 to wide string +std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); +std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const std::basic_string, std::allocator >& str); #endif - // Memory allocation function interface; returns pointer to allocated memory or NULL on failure - typedef void* (*allocation_function)(size_t size); - - // Memory deallocation function interface - typedef void (*deallocation_function)(void* ptr); +// Memory allocation function interface; returns pointer to allocated memory or NULL on failure +typedef void* (*allocation_function)(size_t size); - // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions. - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - - // Get current memory management functions - allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); +// Memory deallocation function interface +typedef void (*deallocation_function)(void* ptr); + +// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions. +void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); + +// Get current memory management functions +allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); +deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); } #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) namespace std { - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&); +// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) +std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&); +std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&); +std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&); } #endif #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) namespace std { - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&); +// Workarounds for (non-standard) iterator category detection +std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&); +std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&); +std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&); } #endif @@ -1388,7 +1379,7 @@ namespace std * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND From 986303fd89707409b2337198c9f924397b6fb564 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 1 Feb 2017 16:59:38 +0000 Subject: [PATCH 109/176] bump --- BUILD-INSTRUCTIONS.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index a41582bfa..7b9bc3a8a 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -7,4 +7,3 @@ into the source tree from elsewhere: * "bjam-files" is taken from Boost. * "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm - From e0dc9364c86adc2085e61d53f5427c3a1228ed95 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 3 Feb 2017 10:01:02 +0000 Subject: [PATCH 110/176] remove Compact pt from Moses2 (keep compact lex reordering model) --- moses2/FF/FeatureRegistry.cpp | 2 - moses2/Jamfile | 2 - .../CompactPT/PhraseDecoder.cpp | 466 ------------------ .../CompactPT/PhraseDecoder.h | 142 ------ .../CompactPT/PhraseTableCompact.cpp | 222 --------- .../CompactPT/PhraseTableCompact.h | 68 --- 6 files changed, 902 deletions(-) delete mode 100644 moses2/TranslationModel/CompactPT/PhraseDecoder.cpp delete mode 100644 moses2/TranslationModel/CompactPT/PhraseDecoder.h delete mode 100644 moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp delete mode 100644 moses2/TranslationModel/CompactPT/PhraseTableCompact.h diff --git a/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp index 3ec8706e5..85abd7214 100644 --- a/moses2/FF/FeatureRegistry.cpp +++ b/moses2/FF/FeatureRegistry.cpp @@ -1,7 +1,6 @@ #include "FeatureRegistry.h" #include "../TranslationModel/Memory/PhraseTableMemory.h" -#include "../TranslationModel/CompactPT/PhraseTableCompact.h" #include "../TranslationModel/ProbingPT/ProbingPT.h" #include "../TranslationModel/UnknownWordPenalty.h" #include "../TranslationModel/Transliteration.h" @@ -55,7 +54,6 @@ FeatureRegistry::FeatureRegistry() // Feature with different name than class. #define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >()); - MOSES_FNAME2("PhraseDictionaryCompact", PhraseTableCompact); MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory); MOSES_FNAME(ProbingPT); MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration); diff --git a/moses2/Jamfile b/moses2/Jamfile index ccd2b98e5..aaff82b9c 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -59,12 +59,10 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio TranslationModel/UnknownWordPenalty.cpp TranslationModel/Memory/PhraseTableMemory.cpp - TranslationModel/CompactPT/PhraseTableCompact.cpp TranslationModel/CompactPT/BlockHashIndex.cpp TranslationModel/CompactPT/CmphStringVectorAdapter.cpp TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp TranslationModel/CompactPT/MurmurHash3.cpp - TranslationModel/CompactPT/PhraseDecoder.cpp TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp TranslationModel/CompactPT/ThrowingFwrite.cpp diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp deleted file mode 100644 index 7860fed94..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp +++ /dev/null @@ -1,466 +0,0 @@ -// $Id$ -// vim:tabstop=2 -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include - -#include "PhraseDecoder.h" -#include "../../System.h" -#include "../../SubPhrase.h" - -using namespace std; - -namespace Moses2 -{ - -PhraseDecoder::PhraseDecoder( - PhraseTableCompact &phraseDictionary, - const std::vector* input, - const std::vector* output, - size_t numScoreComponent - // , const std::vector* weight -) - : m_coding(None), m_numScoreComponent(numScoreComponent), - m_containsAlignmentInfo(true), m_maxRank(0), - m_symbolTree(0), m_multipleScoreTrees(false), - m_scoreTrees(1), m_alignTree(0), - m_phraseDictionary(phraseDictionary), m_input(input), m_output(output), - // m_weight(weight), - m_separator(" ||| ") -{ } - -PhraseDecoder::~PhraseDecoder() -{ - if(m_symbolTree) - delete m_symbolTree; - - for(size_t i = 0; i < m_scoreTrees.size(); i++) - if(m_scoreTrees[i]) - delete m_scoreTrees[i]; - - if(m_alignTree) - delete m_alignTree; -} - -inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol) -{ - boost::unordered_map::iterator it - = m_sourceSymbolsMap.find(symbol); - if(it != m_sourceSymbolsMap.end()) - return it->second; - - size_t idx = m_sourceSymbols.find(symbol); - m_sourceSymbolsMap[symbol] = idx; - return idx; -} - -inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const -{ - if(idx < m_targetSymbols.size()) - return m_targetSymbols[idx]; - return std::string("##ERROR##"); -} - -inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol) -{ - return (encodedSymbol >> 30) + 1; -} - -inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol) -{ - return (encodedSymbol >> 31) + 1; -} - -inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank) -{ - size_t srcTrgIdx = m_lexicalTableIndex[srcIdx]; - return m_lexicalTable[srcTrgIdx + rank].second; -} - -size_t PhraseDecoder::GetMaxSourcePhraseLength() -{ - return m_maxPhraseLength; -} - -inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol) -{ - return encodedSymbol &= ~(3 << 30); -} - -inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol) -{ - return encodedSymbol &= ~(255 << 24); -} - -inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol) -{ - encodedSymbol &= ~(3 << 30); - encodedSymbol >>= 24; - return encodedSymbol; -} - -inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol) -{ - return encodedSymbol &= ~(3 << 30); -} - -inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol) -{ - return encodedSymbol &= ~(1 << 31); -} - -inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol) -{ - return ((encodedSymbol >> 25) & 63) - 32; -} - -inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol) -{ - return ((encodedSymbol >> 19) & 63) - 32; -} - -inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol) -{ - return (encodedSymbol & 524287); -} - -size_t PhraseDecoder::Load(std::FILE* in) -{ - size_t start = std::ftell(in); - size_t read = 0; - - read += std::fread(&m_coding, sizeof(m_coding), 1, in); - read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in); - read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in); - read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in); - read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in); - - if(m_coding == REnc) { - m_sourceSymbols.load(in); - - size_t size; - read += std::fread(&size, sizeof(size_t), 1, in); - m_lexicalTableIndex.resize(size); - read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in); - - read += std::fread(&size, sizeof(size_t), 1, in); - m_lexicalTable.resize(size); - read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in); - } - - m_targetSymbols.load(in); - - m_symbolTree = new CanonicalHuffman(in); - - read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in); - if(m_multipleScoreTrees) { - m_scoreTrees.resize(m_numScoreComponent); - for(size_t i = 0; i < m_numScoreComponent; i++) - m_scoreTrees[i] = new CanonicalHuffman(in); - } else { - m_scoreTrees.resize(1); - m_scoreTrees[0] = new CanonicalHuffman(in); - } - - if(m_containsAlignmentInfo) - m_alignTree = new CanonicalHuffman(in); - - size_t end = std::ftell(in); - return end - start; -} - -std::string PhraseDecoder::MakeSourceKey(std::string &source) -{ - return source + m_separator; -} - -TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection( - const ManagerBase &mgr, - const Phrase &sourcePhrase, - bool topLevel, - bool eval) -{ - - // Not using TargetPhraseCollection avoiding "new" operator - // which can introduce heavy locking with multiple threads - TargetPhraseVectorPtr tpv(new TargetPhraseVector()); - size_t bitsLeft = 0; - - if(m_coding == PREnc) { - std::pair cachedPhraseColl - = m_decodingCache.Retrieve(sourcePhrase); - - // Has been cached and is complete or does not need to be completed - if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0)) - return cachedPhraseColl.first; - - // Has been cached, but is incomplete - else if(cachedPhraseColl.first != NULL) { - bitsLeft = cachedPhraseColl.second; - tpv->resize(cachedPhraseColl.first->size()); - std::copy(cachedPhraseColl.first->begin(), - cachedPhraseColl.first->end(), - tpv->begin()); - } - } - - // Retrieve source phrase identifier - std::string sourcePhraseString = sourcePhrase.GetString(*m_input); - size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)]; - /* - cerr << "sourcePhraseString=" << sourcePhraseString << " " - << sourcePhraseId - << endl; - */ - if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) { - // Retrieve compressed and encoded target phrase collection - std::string encodedPhraseCollection; - if(m_phraseDictionary.m_inMemory) - encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str(); - else - encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str(); - - BitWrapper<> encodedBitStream(encodedPhraseCollection); - if(m_coding == PREnc && bitsLeft) - encodedBitStream.SeekFromEnd(bitsLeft); - - // Decompress and decode target phrase collection - TargetPhraseVectorPtr decodedPhraseColl = - DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval); - - return decodedPhraseColl; - } else - return TargetPhraseVectorPtr(); -} - -TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( - const ManagerBase &mgr, - TargetPhraseVectorPtr tpv, - BitWrapper<> &encodedBitStream, - const Phrase &sourcePhrase, - bool topLevel, - bool eval) -{ - const System &system = mgr.system; - FactorCollection &vocab = system.GetVocab(); - - bool extending = tpv->size(); - size_t bitsLeft = encodedBitStream.TellFromEnd(); - - std::vector sourceWords; - if(m_coding == REnc) { - for(size_t i = 0; i < sourcePhrase.GetSize(); i++) { - std::string sourceWord - = sourcePhrase[i].GetString(*m_input); - unsigned idx = GetSourceSymbolId(sourceWord); - sourceWords.push_back(idx); - } - } - - unsigned phraseStopSymbol = 0; - AlignPoint alignStopSymbol(-1, -1); - - std::vector scores; - std::set alignment; - - enum DecodeState { New, Symbol, Score, Alignment, Add } state = New; - - size_t srcSize = sourcePhrase.GetSize(); - - TPCompact* targetPhrase = NULL; - while(encodedBitStream.TellFromEnd()) { - - if(state == New) { - // Creating new TargetPhrase on the heap - tpv->push_back(TPCompact()); - targetPhrase = &tpv->back(); - - alignment.clear(); - scores.clear(); - - state = Symbol; - } - - if(state == Symbol) { - unsigned symbol = m_symbolTree->Read(encodedBitStream); - if(symbol == phraseStopSymbol) { - state = Score; - } else { - if(m_coding == REnc) { - std::string wordString; - size_t type = GetREncType(symbol); - - if(type == 1) { - unsigned decodedSymbol = DecodeREncSymbol1(symbol); - wordString = GetTargetSymbol(decodedSymbol); - } else if (type == 2) { - size_t rank = DecodeREncSymbol2Rank(symbol); - size_t srcPos = DecodeREncSymbol2Position(symbol); - - if(srcPos >= sourceWords.size()) - return TargetPhraseVectorPtr(); - - wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); - if(m_phraseDictionary.m_useAlignmentInfo) { - size_t trgPos = targetPhrase->words.size(); - alignment.insert(AlignPoint(srcPos, trgPos)); - } - } else if(type == 3) { - size_t rank = DecodeREncSymbol3(symbol); - size_t srcPos = targetPhrase->words.size(); - - if(srcPos >= sourceWords.size()) - return TargetPhraseVectorPtr(); - - wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); - if(m_phraseDictionary.m_useAlignmentInfo) { - size_t trgPos = srcPos; - alignment.insert(AlignPoint(srcPos, trgPos)); - } - } - - Word word; - word.CreateFromString(vocab, system, wordString); - targetPhrase->words.push_back(word); - } else if(m_coding == PREnc) { - // if the symbol is just a word - if(GetPREncType(symbol) == 1) { - unsigned decodedSymbol = DecodePREncSymbol1(symbol); - - Word word; - word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol)); - targetPhrase->words.push_back(word); - } - // if the symbol is a subphrase pointer - else { - int left = DecodePREncSymbol2Left(symbol); - int right = DecodePREncSymbol2Right(symbol); - unsigned rank = DecodePREncSymbol2Rank(symbol); - - int srcStart = left + targetPhrase->words.size(); - int srcEnd = srcSize - right - 1; - - // false positive consistency check - if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize) - return TargetPhraseVectorPtr(); - - // false positive consistency check - if(m_maxRank && rank > m_maxRank) - return TargetPhraseVectorPtr(); - - // set subphrase by default to itself - TargetPhraseVectorPtr subTpv = tpv; - - // if range smaller than source phrase retrieve subphrase - if(unsigned(srcEnd - srcStart + 1) != srcSize) { - SubPhrase subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1); - subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false); - } else { - // false positive consistency check - if(rank >= tpv->size()-1) - return TargetPhraseVectorPtr(); - } - - // false positive consistency check - if(subTpv != NULL && rank < subTpv->size()) { - // insert the subphrase into the main target phrase - TPCompact& subTp = subTpv->at(rank); - if(m_phraseDictionary.m_useAlignmentInfo) { - // reconstruct the alignment data based on the alignment of the subphrase - for(std::set::const_iterator it = subTp.alignment.begin(); - it != subTp.alignment.end(); it++) { - alignment.insert(AlignPointSizeT(srcStart + it->first, - targetPhrase->words.size() + it->second)); - } - } - - std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words)); - } else - return TargetPhraseVectorPtr(); - } - } else { - Word word; - word.CreateFromString(vocab, system, GetTargetSymbol(symbol)); - targetPhrase->words.push_back(word); - } - } - } else if(state == Score) { - size_t idx = m_multipleScoreTrees ? scores.size() : 0; - float score = m_scoreTrees[idx]->Read(encodedBitStream); - scores.push_back(score); - - if(scores.size() == m_numScoreComponent) { - targetPhrase->scores = scores; - - if(m_containsAlignmentInfo) - state = Alignment; - else - state = Add; - } - } else if(state == Alignment) { - AlignPoint alignPoint = m_alignTree->Read(encodedBitStream); - if(alignPoint == alignStopSymbol) { - state = Add; - } else { - if(m_phraseDictionary.m_useAlignmentInfo) - alignment.insert(AlignPointSizeT(alignPoint)); - } - } - - if(state == Add) { - if(m_phraseDictionary.m_useAlignmentInfo) { - size_t sourceSize = sourcePhrase.GetSize(); - size_t targetSize = targetPhrase->words.size(); - for(std::set::iterator it = alignment.begin(); it != alignment.end(); it++) { - if(it->first >= sourceSize || it->second >= targetSize) - return TargetPhraseVectorPtr(); - } - targetPhrase->alignment = alignment; - } - - if(m_coding == PREnc) { - if(!m_maxRank || tpv->size() <= m_maxRank) - bitsLeft = encodedBitStream.TellFromEnd(); - - if(!topLevel && m_maxRank && tpv->size() >= m_maxRank) - break; - } - - if(encodedBitStream.TellFromEnd() <= 8) - break; - - state = New; - } - } - - if(m_coding == PREnc && !extending) { - bitsLeft = bitsLeft > 8 ? bitsLeft : 0; - m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank); - } - - return tpv; -} - -void PhraseDecoder::PruneCache() -{ - m_decodingCache.Prune(); -} - -} diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/moses2/TranslationModel/CompactPT/PhraseDecoder.h deleted file mode 100644 index 79faa38a6..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseDecoder.h +++ /dev/null @@ -1,142 +0,0 @@ -// $Id$ -// vim:tabstop=2 -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "PhraseTableCompact.h" -#include "StringVector.h" -#include "CanonicalHuffman.h" -#include "TargetPhraseCollectionCache.h" - -#include "../../Phrase.h" -#include "../../ManagerBase.h" - -namespace Moses2 -{ - -class PhraseTableCompact; - -class PhraseDecoder -{ -protected: - - friend class PhraseTableCompact; - - typedef std::pair AlignPoint; - typedef std::pair SrcTrg; - - enum Coding { None, REnc, PREnc } m_coding; - - size_t m_numScoreComponent; - bool m_containsAlignmentInfo; - size_t m_maxRank; - size_t m_maxPhraseLength; - - boost::unordered_map m_sourceSymbolsMap; - StringVector m_sourceSymbols; - StringVector m_targetSymbols; - - std::vector m_lexicalTableIndex; - std::vector m_lexicalTable; - - CanonicalHuffman* m_symbolTree; - - bool m_multipleScoreTrees; - std::vector*> m_scoreTrees; - - CanonicalHuffman* m_alignTree; - - TargetPhraseCollectionCache m_decodingCache; - - PhraseTableCompact& m_phraseDictionary; - - // *********************************************** - - const std::vector* m_input; - const std::vector* m_output; - - std::string m_separator; - - // *********************************************** - - unsigned GetSourceSymbolId(std::string& s); - std::string GetTargetSymbol(unsigned id) const; - - size_t GetREncType(unsigned encodedSymbol); - size_t GetPREncType(unsigned encodedSymbol); - - unsigned GetTranslation(unsigned srcIdx, size_t rank); - - size_t GetMaxSourcePhraseLength(); - - unsigned DecodeREncSymbol1(unsigned encodedSymbol); - unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol); - unsigned DecodeREncSymbol2Position(unsigned encodedSymbol); - unsigned DecodeREncSymbol3(unsigned encodedSymbol); - - unsigned DecodePREncSymbol1(unsigned encodedSymbol); - int DecodePREncSymbol2Left(unsigned encodedSymbol); - int DecodePREncSymbol2Right(unsigned encodedSymbol); - unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol); - - std::string MakeSourceKey(std::string &); - -public: - - PhraseDecoder( - PhraseTableCompact &phraseDictionary, - const std::vector* input, - const std::vector* output, - size_t numScoreComponent - ); - - ~PhraseDecoder(); - - size_t Load(std::FILE* in); - - TargetPhraseVectorPtr CreateTargetPhraseCollection( - const ManagerBase &mgr, - const Phrase &sourcePhrase, - bool topLevel = false, - bool eval = true); - - TargetPhraseVectorPtr DecodeCollection( - const ManagerBase &mgr, - TargetPhraseVectorPtr tpv, - BitWrapper<> &encodedBitStream, - const Phrase &sourcePhrase, - bool topLevel, - bool eval); - - void PruneCache(); -}; - -} - diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp deleted file mode 100644 index 49244df1b..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include -#include -#include "PhraseTableCompact.h" -#include "PhraseDecoder.h" -#include "../../PhraseBased/InputPath.h" -#include "../../PhraseBased/Manager.h" -#include "../../PhraseBased/TargetPhrases.h" -#include "../../PhraseBased/TargetPhraseImpl.h" -#include "../../PhraseBased/Sentence.h" - -using namespace std; -using namespace boost::algorithm; - -namespace Moses2 -{ -bool PhraseTableCompact::s_inMemoryByDefault = false; - -PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line) -:PhraseTable(startInd, line) -,m_inMemory(s_inMemoryByDefault) -,m_useAlignmentInfo(true) -,m_hash(10, 16) -,m_phraseDecoder(0) -{ - ReadParameters(); -} - -PhraseTableCompact::~PhraseTableCompact() -{ - -} - -void PhraseTableCompact::Load(System &system) -{ - std::string tFilePath = m_path; - - std::string suffix = ".minphr"; - if (!ends_with(tFilePath, suffix)) tFilePath += suffix; - if (!FileExists(tFilePath)) - throw runtime_error("Error: File " + tFilePath + " does not exist."); - - m_phraseDecoder - = new PhraseDecoder(*this, &m_input, &m_output, GetNumScores()); - - std::FILE* pFile = std::fopen(tFilePath.c_str() , "r"); - - size_t indexSize; - //if(m_inMemory) - // Load source phrase index into memory - indexSize = m_hash.Load(pFile); - // else - // Keep source phrase index on disk - //indexSize = m_hash.LoadIndex(pFile); - - size_t coderSize = m_phraseDecoder->Load(pFile); - - size_t phraseSize; - if(m_inMemory) { - // Load target phrase collections into memory - phraseSize = m_targetPhrasesMemory.load(pFile, false); - } - else { - // Keep target phrase collections on disk - phraseSize = m_targetPhrasesMapped.load(pFile, true); - } - - UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0, - "Not successfully loaded"); -} - -void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "blah") { - - } - else { - PhraseTable::SetParameter(key, value); - } -} - -void PhraseTableCompact::CleanUpAfterSentenceProcessing() const -{ - //if(!m_sentenceCache.get()) - // m_sentenceCache.reset(new PhraseCache()); - - m_phraseDecoder->PruneCache(); - //m_sentenceCache->clear(); -} - - -// pb -void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const -{ - size_t inputSize = static_cast(mgr.GetInput()).GetSize(); - InputPaths &inputPathsCast = static_cast(inputPaths); - - for (size_t i = 0; i < inputSize; ++i) { - for (size_t startPos = 0; startPos < inputSize; ++startPos) { - size_t endPos = startPos + i; - if (endPos >= inputSize) { - break; - } - InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i); - //cerr << "path=" << path->Debug(mgr.system) << endl; - TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tps); - } - } -} - -TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const -{ - TargetPhrases *ret = NULL; - - const Phrase &sourcePhrase = inputPath.subPhrase; - //cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl; - - // There is no souch source phrase if source phrase is longer than longest - // observed source phrase during compilation - if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) - return ret; - - // Retrieve target phrase collection from phrase table - TargetPhraseVectorPtr decodedPhraseColl - = m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true); - - if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { - TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); - //TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection); - ret = new (pool.Allocate()) TargetPhrases(pool, decodedPhraseColl->size()); - - for (size_t i = 0; i < decodedPhraseColl->size(); ++i) { - const TPCompact &tpCompact = decodedPhraseColl->at(i); - const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase); - - ret->AddTargetPhrase(*tp); - } - - ret->SortAndPrune(m_tableLimit); - mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase); - - //cerr << "RET2=" << ret->Debug(mgr.system) << endl; - /* - // Cache phrase pair for clean-up or retrieval with PREnc - const_cast(this)->CacheForCleanup(phraseColl); - - return phraseColl; - */ - } - - return ret; - -} - -const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const -{ - MemPool &pool = mgr.GetPool(); - - size_t size = tpCompact.words.size(); - TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size); - - // words - for (size_t i = 0; i < size; ++i) { - const Word &compactWord = tpCompact.words[i]; - Word &tpWord = (*ret)[i]; - tpWord = compactWord; - } - - // scores - Scores &scores = ret->GetScores(); - scores.Assign(mgr.system, *this, tpCompact.scores); - - // align - ret->SetAlignTerm(tpCompact.alignment); - - // score - mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret); - - // Cache phrase pair for clean-up or retrieval with PREnc - //const_cast(this)->CacheForCleanup(phraseColl); - - //cerr << "ret=" << ret->Debug(mgr.system) << endl; - return ret; -} - - -// scfg -void PhraseTableCompact::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const -{ - UTIL_THROW2("Not implemented"); -} - -void PhraseTableCompact::Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const -{ - UTIL_THROW2("Not implemented"); -} - -void PhraseTableCompact::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const -{ - UTIL_THROW2("Not implemented"); -} - -} diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h deleted file mode 100644 index 84ea7e4b2..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include "../PhraseTable.h" -#include "BlockHashIndex.h" - -namespace Moses2 -{ -class PhraseDecoder; -class TPCompact; - -class PhraseTableCompact: public PhraseTable -{ -public: - PhraseTableCompact(size_t startInd, const std::string &line); - virtual ~PhraseTableCompact(); - void Load(System &system); - virtual void SetParameter(const std::string& key, const std::string& value); - - virtual void CleanUpAfterSentenceProcessing() const; - - virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; - - // scfg - virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; - - virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; - - virtual void Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; - -protected: - static bool s_inMemoryByDefault; - bool m_inMemory; - bool m_useAlignmentInfo; - - BlockHashIndex m_hash; - - StringVector m_targetPhrasesMapped; - StringVector m_targetPhrasesMemory; - - friend class PhraseDecoder; - PhraseDecoder* m_phraseDecoder; - - const TargetPhraseImpl *CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const; - - // SCFG - virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; - -}; - -} From c44d226b012189aeabc65662697ca6072a416d64 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 3 Feb 2017 10:02:39 +0000 Subject: [PATCH 111/176] remove DALM from Moses2 --- contrib/other-builds/moses2/.project | 30 ---- moses2/Jamfile | 1 - moses2/LM/LanguageModelDALM.cpp | 246 --------------------------- moses2/LM/LanguageModelDALM.h | 75 -------- 4 files changed, 352 deletions(-) delete mode 100644 moses2/LM/LanguageModelDALM.cpp delete mode 100644 moses2/LM/LanguageModelDALM.h diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project index 8142f8b63..9d6349950 100644 --- a/contrib/other-builds/moses2/.project +++ b/contrib/other-builds/moses2/.project @@ -517,16 +517,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.h - - LM/LanguageModelDALM.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/LM/LanguageModelDALM.cpp - - - LM/LanguageModelDALM.h - 1 - PARENT-3-PROJECT_LOC/moses2/LM/LanguageModelDALM.h - MorphoTrie/MorphTrie.h 1 @@ -1457,26 +1447,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PackedArray.h - - TranslationModel/CompactPT/PhraseDecoder.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp - - - TranslationModel/CompactPT/PhraseDecoder.h - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseDecoder.h - - - TranslationModel/CompactPT/PhraseTableCompact.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp - - - TranslationModel/CompactPT/PhraseTableCompact.h - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/CompactPT/PhraseTableCompact.h - TranslationModel/CompactPT/StringVector.h 1 diff --git a/moses2/Jamfile b/moses2/Jamfile index aaff82b9c..b09bd089b 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -48,7 +48,6 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio FF/OSM/KenOSM.cpp FF/OSM/osmHyp.cpp - # LM/LanguageModelDALM.cpp LM/LanguageModel.cpp LM/KENLM.cpp LM/KENLMBatch.cpp diff --git a/moses2/LM/LanguageModelDALM.cpp b/moses2/LM/LanguageModelDALM.cpp deleted file mode 100644 index 7d3e8242b..000000000 --- a/moses2/LM/LanguageModelDALM.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/* - * LanguageModelDALM.cpp - * - * Created on: 5 Dec 2015 - * Author: hieu - */ - -#include "LanguageModelDALM.h" -#include "../TypeDef.h" -#include "../System.h" -#include "dalm.h" -#include "util/exception.hh" -#include "../legacy/InputFileStream.h" - -using namespace std; - -namespace Moses2 -{ - -////////////////////////////////////////////////////////////////////////////////////////// -class Murmur: public DALM::State::HashFunction -{ -public: - Murmur(std::size_t seed=0): seed(seed) { - } - virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const { - return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed); - } -private: - std::size_t seed; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -class DALMState : public FFState -{ -private: - DALM::State state; - -public: - DALMState() { - } - - DALMState(const DALMState &from) { - state = from.state; - } - - virtual ~DALMState() { - } - - void reset(const DALMState &from) { - state = from.state; - } - - virtual int Compare(const FFState& other) const { - const DALMState &o = static_cast(other); - if(state.get_count() < o.state.get_count()) return -1; - else if(state.get_count() > o.state.get_count()) return 1; - else return state.compare(o.state); - } - - virtual size_t hash() const { - // imitate KenLM - return state.hash(Murmur()); - } - - virtual bool operator==(const FFState& other) const { - const DALMState &o = static_cast(other); - return state.compare(o.state) == 0; - } - - DALM::State &get_state() { - return state; - } - - void refresh() { - state.refresh(); - } - - virtual std::string ToString() const - { return "DALM state"; } - -}; - -////////////////////////////////////////////////////////////////////////////////////////////////////// -inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt) -{ - ifstream ifs(inifile); - string line; - - getline(ifs, line); - while(ifs) { - unsigned int pos = line.find("="); - string key = line.substr(0, pos); - string value = line.substr(pos+1, line.size()-pos); - if(key=="MODEL") { - model = value; - } else if(key=="WORDS") { - words = value; - } else if(key=="WORDSTXT") { - wordstxt = value; - } - getline(ifs, line); - } -} -///////////////////////// - -LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line) -:StatefulFeatureFunction(startInd, line) -{ - ReadParameters(); -} - -LanguageModelDALM::~LanguageModelDALM() { - // TODO Auto-generated destructor stub -} - -void LanguageModelDALM::Load(System &system) -{ - ///////////////////// - // READING INIFILE // - ///////////////////// - string inifile= m_filePath + "/dalm.ini"; - - string model; // Path to the double-array file. - string words; // Path to the vocabulary file. - string wordstxt; //Path to the vocabulary file in text format. - read_ini(inifile.c_str(), model, words, wordstxt); - - model = m_filePath + "/" + model; - words = m_filePath + "/" + words; - wordstxt = m_filePath + "/" + wordstxt; - - UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(), - util::FileOpenException, - "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist"); - - //////////////// - // LOADING LM // - //////////////// - - // Preparing a logger object. - m_logger = new DALM::Logger(stderr); - m_logger->setLevel(DALM::LOGGER_INFO); - - // Load the vocabulary file. - m_vocab = new DALM::Vocabulary(words, *m_logger); - - // Load the language model. - m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger); - - wid_start = m_vocab->lookup(BOS_); - wid_end = m_vocab->lookup(EOS_); - - // vocab mapping - CreateVocabMapping(wordstxt, system); - - m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system); -} - -void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system) -{ - InputFileStream vocabStrm(wordstxt); - - std::vector< std::pair > vlist; - string line; - std::size_t max_fid = 0; - while(getline(vocabStrm, line)) { - const Factor *factor = system.GetVocab().AddFactor(line, system); - std::size_t fid = factor->GetId(); - DALM::VocabId wid = m_vocab->lookup(line.c_str()); - - vlist.push_back(std::pair(fid, wid)); - if(max_fid < fid) max_fid = fid; - } - - for(std::size_t i = 0; i < m_vocabMap.size(); i++) { - m_vocabMap[i] = m_vocab->unk(); - } - - m_vocabMap.resize(max_fid+1, m_vocab->unk()); - std::vector< std::pair >::iterator it = vlist.begin(); - while(it != vlist.end()) { - std::pair &entry = *it; - m_vocabMap[entry.first] = entry.second; - - ++it; - } -} - -void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "factor") { - m_factorType = Scan(value); - } else if (key == "order") { - m_nGramOrder = Scan(value); - } else if (key == "path") { - m_filePath = value; - } else { - StatefulFeatureFunction::SetParameter(key, value); - } - m_ContextSize = m_nGramOrder-1; -} - -FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const -{ - DALMState *state = new DALMState(); - return state; -} - -void LanguageModelDALM::EmptyHypothesisState(FFState &state, - const ManagerBase &mgr, - const InputType &input, - const Hypothesis &hypo) const -{ - DALMState &dalmState = static_cast(state); - m_lm->init_state(dalmState.get_state()); -} - - void LanguageModelDALM::EvaluateInIsolation(MemPool &pool, - const System &system, - const Phrase &source, - const TargetPhraseImpl &targetPhrase, - Scores &scores, - SCORE &estimatedScore) const - { - - } - -void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr, -const Hypothesis &hypo, -const FFState &prevState, -Scores &scores, -FFState &state) const -{ - -} - -void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const -{ - UTIL_THROW2("Not implemented"); -} - -} - diff --git a/moses2/LM/LanguageModelDALM.h b/moses2/LM/LanguageModelDALM.h deleted file mode 100644 index cbbeca97d..000000000 --- a/moses2/LM/LanguageModelDALM.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * LanguageModelDALM.h - * - * Created on: 5 Dec 2015 - * Author: hieu - */ - -#pragma once -#include "../FF/StatefulFeatureFunction.h" -#include "../legacy/Util2.h" -#include "../legacy/Factor.h" - -namespace DALM -{ -class Logger; -class Vocabulary; -class State; -class LM; -union Fragment; -class Gap; - -typedef unsigned int VocabId; -} - -namespace Moses2 -{ - -class LanguageModelDALM: public StatefulFeatureFunction -{ -public: - LanguageModelDALM(size_t startInd, const std::string &line); - virtual ~LanguageModelDALM(); - - virtual void Load(System &system); - virtual void SetParameter(const std::string& key, const std::string& value); - - virtual FFState* BlankState(MemPool &pool, const System &sys) const; - virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; - - virtual void - EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; - - virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; - - virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; - -protected: - FactorType m_factorType; - - std::string m_filePath; - size_t m_nGramOrder; //! max n-gram length contained in this LM - size_t m_ContextSize; - - DALM::Logger *m_logger; - DALM::Vocabulary *m_vocab; - DALM::LM *m_lm; - DALM::VocabId wid_start, wid_end; - - const Factor *m_beginSentenceFactor; - - mutable std::vector m_vocabMap; - - void CreateVocabMapping(const std::string &wordstxt, const System &system); - -}; - -} - From 5f0f8499e42de4bf5b938caa2dd7f884ed5741ff Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 3 Feb 2017 10:10:03 +0000 Subject: [PATCH 112/176] MorphoTrie -> InMemoryTrie --- contrib/other-builds/moses2/.project | 40 +++++++++---------- .../InMemoryTrie.h} | 16 ++++---- moses2/{MorphoTrie => InMemoryTrie}/Node.h | 4 +- moses2/{MorphoTrie => InMemoryTrie}/utils.h | 4 +- moses2/LM/LanguageModel.h | 4 +- 5 files changed, 33 insertions(+), 35 deletions(-) rename moses2/{MorphoTrie/MorphTrie.h => InMemoryTrie/InMemoryTrie.h} (83%) rename moses2/{MorphoTrie => InMemoryTrie}/Node.h (96%) rename moses2/{MorphoTrie => InMemoryTrie}/utils.h (93%) diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project index 9d6349950..87263a312 100644 --- a/contrib/other-builds/moses2/.project +++ b/contrib/other-builds/moses2/.project @@ -97,6 +97,11 @@ 1 PARENT-3-PROJECT_LOC/moses2/HypothesisColl.h + + InMemoryTrie + 2 + virtual:/virtual + InputPathBase.cpp 1 @@ -167,11 +172,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/MemPoolAllocator.h - - MorphoTrie - 2 - virtual:/virtual - Phrase.cpp 1 @@ -477,6 +477,21 @@ 1 PARENT-3-PROJECT_LOC/moses2/FF/WordPenalty.h + + InMemoryTrie/InMemoryTrie.h + 1 + PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/InMemoryTrie.h + + + InMemoryTrie/Node.h + 1 + PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/Node.h + + + InMemoryTrie/utils.h + 1 + PARENT-3-PROJECT_LOC/moses2/InMemoryTrie/utils.h + LM/GPULM.cpp 1 @@ -517,21 +532,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/LM/LanguageModel.h - - MorphoTrie/MorphTrie.h - 1 - PARENT-3-PROJECT_LOC/moses2/MorphoTrie/MorphTrie.h - - - MorphoTrie/Node.h - 1 - PARENT-3-PROJECT_LOC/moses2/MorphoTrie/Node.h - - - MorphoTrie/utils.h - 1 - PARENT-3-PROJECT_LOC/moses2/MorphoTrie/utils.h - PhraseBased/CubePruningMiniStack 2 diff --git a/moses2/MorphoTrie/MorphTrie.h b/moses2/InMemoryTrie/InMemoryTrie.h similarity index 83% rename from moses2/MorphoTrie/MorphTrie.h rename to moses2/InMemoryTrie/InMemoryTrie.h index 0b013b5bb..e7e54a118 100644 --- a/moses2/MorphoTrie/MorphTrie.h +++ b/moses2/InMemoryTrie/InMemoryTrie.h @@ -1,5 +1,4 @@ -#ifndef MORPHTRIE_H_ -#define MORPHTRIE_H_ +#pragma once #include #include "Node.h" @@ -8,10 +7,10 @@ namespace Moses2 { template -class MorphTrie +class InMemoryTrie { public: - MorphTrie() + InMemoryTrie() { } Node* insert(const std::vector& word, @@ -27,7 +26,7 @@ private: }; template -Node* MorphTrie::insert( +Node* InMemoryTrie::insert( const std::vector& word, const ValueClass& value) { Node* cNode = &root; @@ -40,7 +39,7 @@ Node* MorphTrie::insert( } template -const Node* MorphTrie::getNode( +const Node* InMemoryTrie::getNode( const std::vector& words) const { size_t stoppedAtInd; @@ -52,7 +51,7 @@ const Node* MorphTrie::getNode( } template -const Node &MorphTrie::getNode( +const Node &InMemoryTrie::getNode( const std::vector& words, size_t &stoppedAtInd) const { const Node *prevNode = &root, *newNode; @@ -71,7 +70,7 @@ const Node &MorphTrie::getNode( } template -std::vector*> MorphTrie::getNodes( +std::vector*> InMemoryTrie::getNodes( const std::vector& words, size_t &stoppedAtInd) const { std::vector*> ret; @@ -97,4 +96,3 @@ std::vector*> MorphTrie:: } -#endif /* end of include guard: MORPHTRIE_H_ */ diff --git a/moses2/MorphoTrie/Node.h b/moses2/InMemoryTrie/Node.h similarity index 96% rename from moses2/MorphoTrie/Node.h rename to moses2/InMemoryTrie/Node.h index ca165ef67..4511241bf 100644 --- a/moses2/MorphoTrie/Node.h +++ b/moses2/InMemoryTrie/Node.h @@ -1,5 +1,4 @@ -#ifndef NODE_H_ -#define NODE_H_ +#pragma once #include #include @@ -90,4 +89,3 @@ Node* Node::findSub( } -#endif /* end of include guard: NODE_H_ */ diff --git a/moses2/MorphoTrie/utils.h b/moses2/InMemoryTrie/utils.h similarity index 93% rename from moses2/MorphoTrie/utils.h rename to moses2/InMemoryTrie/utils.h index e6f0aa7d6..eccb95a93 100644 --- a/moses2/MorphoTrie/utils.h +++ b/moses2/InMemoryTrie/utils.h @@ -1,4 +1,6 @@ -#include "MorphTrie.h" +#pragma once + +#include "InMemoryTrie.h" #include #include #include diff --git a/moses2/LM/LanguageModel.h b/moses2/LM/LanguageModel.h index d262a8497..d53ebdbc0 100644 --- a/moses2/LM/LanguageModel.h +++ b/moses2/LM/LanguageModel.h @@ -9,7 +9,7 @@ #include "../FF/StatefulFeatureFunction.h" #include "../TypeDef.h" -#include "../MorphoTrie/MorphTrie.h" +#include "../InMemoryTrie/InMemoryTrie.h" #include "../legacy/Factor.h" #include "../legacy/Util2.h" @@ -79,7 +79,7 @@ protected: FactorType m_factorType; size_t m_order; - MorphTrie m_root; + InMemoryTrie m_root; SCORE m_oov; const Factor *m_bos; const Factor *m_eos; From 3916fbc8a6f540b3efecb7298a43c963023bcc6a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 3 Feb 2017 10:27:53 +0000 Subject: [PATCH 113/176] redelete --- moses2/LM/LanguageModelDALM.cpp | 248 ------------------ moses2/LM/LanguageModelDALM.h | 75 ------ .../CompactPT/PhraseTableCompact.cpp | 220 ---------------- .../CompactPT/PhraseTableCompact.h | 68 ----- 4 files changed, 611 deletions(-) delete mode 100644 moses2/LM/LanguageModelDALM.cpp delete mode 100644 moses2/LM/LanguageModelDALM.h delete mode 100644 moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp delete mode 100644 moses2/TranslationModel/CompactPT/PhraseTableCompact.h diff --git a/moses2/LM/LanguageModelDALM.cpp b/moses2/LM/LanguageModelDALM.cpp deleted file mode 100644 index ed2340995..000000000 --- a/moses2/LM/LanguageModelDALM.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* - * LanguageModelDALM.cpp - * - * Created on: 5 Dec 2015 - * Author: hieu - */ - -#include "LanguageModelDALM.h" -#include "../TypeDef.h" -#include "../System.h" -#include "dalm.h" -#include "util/exception.hh" -#include "../legacy/InputFileStream.h" - -using namespace std; - -namespace Moses2 -{ - -////////////////////////////////////////////////////////////////////////////////////////// -class Murmur: public DALM::State::HashFunction -{ -public: - Murmur(std::size_t seed=0): seed(seed) { - } - virtual std::size_t operator()(const DALM::VocabId *words, std::size_t size) const { - return util::MurmurHashNative(words, sizeof(DALM::VocabId) * size, seed); - } -private: - std::size_t seed; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -class DALMState : public FFState -{ -private: - DALM::State state; - -public: - DALMState() { - } - - DALMState(const DALMState &from) { - state = from.state; - } - - virtual ~DALMState() { - } - - void reset(const DALMState &from) { - state = from.state; - } - - virtual int Compare(const FFState& other) const { - const DALMState &o = static_cast(other); - if(state.get_count() < o.state.get_count()) return -1; - else if(state.get_count() > o.state.get_count()) return 1; - else return state.compare(o.state); - } - - virtual size_t hash() const { - // imitate KenLM - return state.hash(Murmur()); - } - - virtual bool operator==(const FFState& other) const { - const DALMState &o = static_cast(other); - return state.compare(o.state) == 0; - } - - DALM::State &get_state() { - return state; - } - - void refresh() { - state.refresh(); - } - - virtual std::string ToString() const { - return "DALM state"; - } - -}; - -////////////////////////////////////////////////////////////////////////////////////////////////////// -inline void read_ini(const char *inifile, string &model, string &words, string &wordstxt) -{ - ifstream ifs(inifile); - string line; - - getline(ifs, line); - while(ifs) { - unsigned int pos = line.find("="); - string key = line.substr(0, pos); - string value = line.substr(pos+1, line.size()-pos); - if(key=="MODEL") { - model = value; - } else if(key=="WORDS") { - words = value; - } else if(key=="WORDSTXT") { - wordstxt = value; - } - getline(ifs, line); - } -} -///////////////////////// - -LanguageModelDALM::LanguageModelDALM(size_t startInd, const std::string &line) - :StatefulFeatureFunction(startInd, line) -{ - ReadParameters(); -} - -LanguageModelDALM::~LanguageModelDALM() -{ - // TODO Auto-generated destructor stub -} - -void LanguageModelDALM::Load(System &system) -{ - ///////////////////// - // READING INIFILE // - ///////////////////// - string inifile= m_filePath + "/dalm.ini"; - - string model; // Path to the double-array file. - string words; // Path to the vocabulary file. - string wordstxt; //Path to the vocabulary file in text format. - read_ini(inifile.c_str(), model, words, wordstxt); - - model = m_filePath + "/" + model; - words = m_filePath + "/" + words; - wordstxt = m_filePath + "/" + wordstxt; - - UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(), - util::FileOpenException, - "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist"); - - //////////////// - // LOADING LM // - //////////////// - - // Preparing a logger object. - m_logger = new DALM::Logger(stderr); - m_logger->setLevel(DALM::LOGGER_INFO); - - // Load the vocabulary file. - m_vocab = new DALM::Vocabulary(words, *m_logger); - - // Load the language model. - m_lm = new DALM::LM(model, *m_vocab, m_nGramOrder, *m_logger); - - wid_start = m_vocab->lookup(BOS_); - wid_end = m_vocab->lookup(EOS_); - - // vocab mapping - CreateVocabMapping(wordstxt, system); - - m_beginSentenceFactor = system.GetVocab().AddFactor(BOS_, system); -} - -void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt, const System &system) -{ - InputFileStream vocabStrm(wordstxt); - - std::vector< std::pair > vlist; - string line; - std::size_t max_fid = 0; - while(getline(vocabStrm, line)) { - const Factor *factor = system.GetVocab().AddFactor(line, system); - std::size_t fid = factor->GetId(); - DALM::VocabId wid = m_vocab->lookup(line.c_str()); - - vlist.push_back(std::pair(fid, wid)); - if(max_fid < fid) max_fid = fid; - } - - for(std::size_t i = 0; i < m_vocabMap.size(); i++) { - m_vocabMap[i] = m_vocab->unk(); - } - - m_vocabMap.resize(max_fid+1, m_vocab->unk()); - std::vector< std::pair >::iterator it = vlist.begin(); - while(it != vlist.end()) { - std::pair &entry = *it; - m_vocabMap[entry.first] = entry.second; - - ++it; - } -} - -void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "factor") { - m_factorType = Scan(value); - } else if (key == "order") { - m_nGramOrder = Scan(value); - } else if (key == "path") { - m_filePath = value; - } else { - StatefulFeatureFunction::SetParameter(key, value); - } - m_ContextSize = m_nGramOrder-1; -} - -FFState* LanguageModelDALM::BlankState(MemPool &pool, const System &sys) const -{ - DALMState *state = new DALMState(); - return state; -} - -void LanguageModelDALM::EmptyHypothesisState(FFState &state, - const ManagerBase &mgr, - const InputType &input, - const Hypothesis &hypo) const -{ - DALMState &dalmState = static_cast(state); - m_lm->init_state(dalmState.get_state()); -} - -void LanguageModelDALM::EvaluateInIsolation(MemPool &pool, - const System &system, - const Phrase &source, - const TargetPhraseImpl &targetPhrase, - Scores &scores, - SCORE &estimatedScore) const -{ - -} - -void LanguageModelDALM::EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, - const FFState &prevState, - Scores &scores, - FFState &state) const -{ - -} - -void LanguageModelDALM::EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const -{ - UTIL_THROW2("Not implemented"); -} - -} - diff --git a/moses2/LM/LanguageModelDALM.h b/moses2/LM/LanguageModelDALM.h deleted file mode 100644 index a9a010dca..000000000 --- a/moses2/LM/LanguageModelDALM.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * LanguageModelDALM.h - * - * Created on: 5 Dec 2015 - * Author: hieu - */ - -#pragma once -#include "../FF/StatefulFeatureFunction.h" -#include "../legacy/Util2.h" -#include "../legacy/Factor.h" - -namespace DALM -{ -class Logger; -class Vocabulary; -class State; -class LM; -union Fragment; -class Gap; - -typedef unsigned int VocabId; -} - -namespace Moses2 -{ - -class LanguageModelDALM: public StatefulFeatureFunction -{ -public: - LanguageModelDALM(size_t startInd, const std::string &line); - virtual ~LanguageModelDALM(); - - virtual void Load(System &system); - virtual void SetParameter(const std::string& key, const std::string& value); - - virtual FFState* BlankState(MemPool &pool, const System &sys) const; - virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, - const InputType &input, const Hypothesis &hypo) const; - - virtual void - EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, - const TargetPhraseImpl &targetPhrase, Scores &scores, - SCORE &estimatedScore) const; - - virtual void EvaluateWhenApplied(const ManagerBase &mgr, - const Hypothesis &hypo, const FFState &prevState, Scores &scores, - FFState &state) const; - - virtual void EvaluateWhenApplied(const SCFG::Manager &mgr, - const SCFG::Hypothesis &hypo, int featureID, Scores &scores, - FFState &state) const; - -protected: - FactorType m_factorType; - - std::string m_filePath; - size_t m_nGramOrder; //! max n-gram length contained in this LM - size_t m_ContextSize; - - DALM::Logger *m_logger; - DALM::Vocabulary *m_vocab; - DALM::LM *m_lm; - DALM::VocabId wid_start, wid_end; - - const Factor *m_beginSentenceFactor; - - mutable std::vector m_vocabMap; - - void CreateVocabMapping(const std::string &wordstxt, const System &system); - -}; - -} - diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp deleted file mode 100644 index 707a52f58..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include -#include -#include "PhraseTableCompact.h" -#include "PhraseDecoder.h" -#include "../../PhraseBased/InputPath.h" -#include "../../PhraseBased/Manager.h" -#include "../../PhraseBased/TargetPhrases.h" -#include "../../PhraseBased/TargetPhraseImpl.h" -#include "../../PhraseBased/Sentence.h" - -using namespace std; -using namespace boost::algorithm; - -namespace Moses2 -{ -bool PhraseTableCompact::s_inMemoryByDefault = false; - -PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line) - :PhraseTable(startInd, line) - ,m_inMemory(s_inMemoryByDefault) - ,m_useAlignmentInfo(true) - ,m_hash(10, 16) - ,m_phraseDecoder(0) -{ - ReadParameters(); -} - -PhraseTableCompact::~PhraseTableCompact() -{ - -} - -void PhraseTableCompact::Load(System &system) -{ - std::string tFilePath = m_path; - - std::string suffix = ".minphr"; - if (!ends_with(tFilePath, suffix)) tFilePath += suffix; - if (!FileExists(tFilePath)) - throw runtime_error("Error: File " + tFilePath + " does not exist."); - - m_phraseDecoder - = new PhraseDecoder(*this, &m_input, &m_output, GetNumScores()); - - std::FILE* pFile = std::fopen(tFilePath.c_str() , "r"); - - size_t indexSize; - //if(m_inMemory) - // Load source phrase index into memory - indexSize = m_hash.Load(pFile); - // else - // Keep source phrase index on disk - //indexSize = m_hash.LoadIndex(pFile); - - size_t coderSize = m_phraseDecoder->Load(pFile); - - size_t phraseSize; - if(m_inMemory) { - // Load target phrase collections into memory - phraseSize = m_targetPhrasesMemory.load(pFile, false); - } else { - // Keep target phrase collections on disk - phraseSize = m_targetPhrasesMapped.load(pFile, true); - } - - UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0, - "Not successfully loaded"); -} - -void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "blah") { - - } else { - PhraseTable::SetParameter(key, value); - } -} - -void PhraseTableCompact::CleanUpAfterSentenceProcessing() const -{ - //if(!m_sentenceCache.get()) - // m_sentenceCache.reset(new PhraseCache()); - - m_phraseDecoder->PruneCache(); - //m_sentenceCache->clear(); -} - - -// pb -void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const -{ - size_t inputSize = static_cast(mgr.GetInput()).GetSize(); - InputPaths &inputPathsCast = static_cast(inputPaths); - - for (size_t i = 0; i < inputSize; ++i) { - for (size_t startPos = 0; startPos < inputSize; ++startPos) { - size_t endPos = startPos + i; - if (endPos >= inputSize) { - break; - } - InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i); - //cerr << "path=" << path->Debug(mgr.system) << endl; - TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); - path->AddTargetPhrases(*this, tps); - } - } -} - -TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const -{ - TargetPhrases *ret = NULL; - - const Phrase &sourcePhrase = inputPath.subPhrase; - //cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl; - - // There is no souch source phrase if source phrase is longer than longest - // observed source phrase during compilation - if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) - return ret; - - // Retrieve target phrase collection from phrase table - TargetPhraseVectorPtr decodedPhraseColl - = m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true); - - if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { - TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); - //TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection); - ret = new (pool.Allocate()) TargetPhrases(pool, decodedPhraseColl->size()); - - for (size_t i = 0; i < decodedPhraseColl->size(); ++i) { - const TPCompact &tpCompact = decodedPhraseColl->at(i); - const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase); - - ret->AddTargetPhrase(*tp); - } - - ret->SortAndPrune(m_tableLimit); - mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase); - - //cerr << "RET2=" << ret->Debug(mgr.system) << endl; - /* - // Cache phrase pair for clean-up or retrieval with PREnc - const_cast(this)->CacheForCleanup(phraseColl); - - return phraseColl; - */ - } - - return ret; - -} - -const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const -{ - MemPool &pool = mgr.GetPool(); - - size_t size = tpCompact.words.size(); - TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size); - - // words - for (size_t i = 0; i < size; ++i) { - const Word &compactWord = tpCompact.words[i]; - Word &tpWord = (*ret)[i]; - tpWord = compactWord; - } - - // scores - Scores &scores = ret->GetScores(); - scores.Assign(mgr.system, *this, tpCompact.scores); - - // align - ret->SetAlignTerm(tpCompact.alignment); - - // score - mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret); - - // Cache phrase pair for clean-up or retrieval with PREnc - //const_cast(this)->CacheForCleanup(phraseColl); - - //cerr << "ret=" << ret->Debug(mgr.system) << endl; - return ret; -} - - -// scfg -void PhraseTableCompact::InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const -{ - UTIL_THROW2("Not implemented"); -} - -void PhraseTableCompact::Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const -{ - UTIL_THROW2("Not implemented"); -} - -void PhraseTableCompact::LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const -{ - UTIL_THROW2("Not implemented"); -} - -} diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h deleted file mode 100644 index dced14c0c..000000000 --- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include "../PhraseTable.h" -#include "BlockHashIndex.h" - -namespace Moses2 -{ -class PhraseDecoder; -class TPCompact; - -class PhraseTableCompact: public PhraseTable -{ -public: - PhraseTableCompact(size_t startInd, const std::string &line); - virtual ~PhraseTableCompact(); - void Load(System &system); - virtual void SetParameter(const std::string& key, const std::string& value); - - virtual void CleanUpAfterSentenceProcessing() const; - - virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, - InputPath &inputPath) const; - - // scfg - virtual void InitActiveChart( - MemPool &pool, - const SCFG::Manager &mgr, - SCFG::InputPath &path) const; - - virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; - - virtual void Lookup( - MemPool &pool, - const SCFG::Manager &mgr, - size_t maxChartSpan, - const SCFG::Stacks &stacks, - SCFG::InputPath &path) const; - -protected: - static bool s_inMemoryByDefault; - bool m_inMemory; - bool m_useAlignmentInfo; - - BlockHashIndex m_hash; - - StringVector m_targetPhrasesMapped; - StringVector m_targetPhrasesMemory; - - friend class PhraseDecoder; - PhraseDecoder* m_phraseDecoder; - - const TargetPhraseImpl *CreateTargetPhrase( - const Manager &mgr, - const TPCompact &tpCompact, - const Phrase &sourcePhrase) const; - - // SCFG - virtual void LookupGivenNode( - MemPool &pool, - const SCFG::Manager &mgr, - const SCFG::ActiveChartEntry &prevEntry, - const SCFG::Word &wordSought, - const Moses2::Hypotheses *hypos, - const Moses2::Range &subPhraseRange, - SCFG::InputPath &outPath) const; - -}; - -} From 1cc7ac7a160acde50b53903b8d960c982850d462 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Fri, 3 Feb 2017 10:33:11 +0000 Subject: [PATCH 114/176] daily automatic beautifier --- moses2/InMemoryTrie/InMemoryTrie.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/moses2/InMemoryTrie/InMemoryTrie.h b/moses2/InMemoryTrie/InMemoryTrie.h index e7e54a118..ba085f6ad 100644 --- a/moses2/InMemoryTrie/InMemoryTrie.h +++ b/moses2/InMemoryTrie/InMemoryTrie.h @@ -10,24 +10,23 @@ template class InMemoryTrie { public: - InMemoryTrie() - { + InMemoryTrie() { } Node* insert(const std::vector& word, - const ValueClass& value); + const ValueClass& value); const Node* getNode( - const std::vector& words) const; + const std::vector& words) const; const Node &getNode(const std::vector& words, size_t &stoppedAtInd) const; std::vector*> getNodes( - const std::vector& words, size_t &stoppedAtInd) const; + const std::vector& words, size_t &stoppedAtInd) const; private: Node root; }; template Node* InMemoryTrie::insert( - const std::vector& word, const ValueClass& value) + const std::vector& word, const ValueClass& value) { Node* cNode = &root; for (size_t i = 0; i < word.size(); ++i) { @@ -40,7 +39,7 @@ Node* InMemoryTrie::insert( template const Node* InMemoryTrie::getNode( - const std::vector& words) const + const std::vector& words) const { size_t stoppedAtInd; const Node &ret = getNode(words, stoppedAtInd); @@ -52,7 +51,7 @@ const Node* InMemoryTrie::getNode( template const Node &InMemoryTrie::getNode( - const std::vector& words, size_t &stoppedAtInd) const + const std::vector& words, size_t &stoppedAtInd) const { const Node *prevNode = &root, *newNode; for (size_t i = 0; i < words.size(); ++i) { @@ -71,7 +70,7 @@ const Node &InMemoryTrie::getNode( template std::vector*> InMemoryTrie::getNodes( - const std::vector& words, size_t &stoppedAtInd) const + const std::vector& words, size_t &stoppedAtInd) const { std::vector*> ret; const Node *prevNode = &root, *newNode; @@ -83,8 +82,7 @@ std::vector*> InMemoryTrie Date: Sat, 4 Feb 2017 20:41:51 +0000 Subject: [PATCH 115/176] bump --- BUILD-INSTRUCTIONS.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index a41582bfa..7b9bc3a8a 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -7,4 +7,3 @@ into the source tree from elsewhere: * "bjam-files" is taken from Boost. * "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm - From 940e08a9462a375bb0fbdf373b3ffbc98756227c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 8 Feb 2017 22:41:53 +0000 Subject: [PATCH 116/176] use thread_local --- moses2/System.cpp | 9 ++++++--- moses2/System.h | 4 ---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/moses2/System.cpp b/moses2/System.cpp index c02c47a6c..931bed296 100644 --- a/moses2/System.cpp +++ b/moses2/System.cpp @@ -166,12 +166,14 @@ void System::LoadDecodeGraphBackoff() MemPool &System::GetSystemPool() const { - return GetThreadSpecificObj(m_systemPool); + thread_local MemPool obj; + return obj; } MemPool &System::GetManagerPool() const { - return GetThreadSpecificObj(m_managerPool); + thread_local MemPool obj; + return obj; } FactorCollection &System::GetVocab() const @@ -181,7 +183,8 @@ FactorCollection &System::GetVocab() const Recycler &System::GetHypoRecycler() const { - return GetThreadSpecificObj(m_hypoRecycler); + thread_local Recycler obj; + return obj; } Batch &System::GetBatch(MemPool &pool) const diff --git a/moses2/System.h b/moses2/System.h index 1d60e96a0..e2f36b62d 100644 --- a/moses2/System.h +++ b/moses2/System.h @@ -65,10 +65,6 @@ public: protected: mutable FactorCollection m_vocab; - mutable boost::thread_specific_ptr m_managerPool; - mutable boost::thread_specific_ptr m_systemPool; - - mutable boost::thread_specific_ptr > m_hypoRecycler; mutable boost::thread_specific_ptr m_batch; From fd2830162765e102db205e12feaf110496575755 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 8 Feb 2017 22:58:32 +0000 Subject: [PATCH 117/176] completely delete boost::thread_specific_ptr --- moses2/System.cpp | 11 ++--------- moses2/System.h | 3 --- moses2/legacy/Util2.h | 14 -------------- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/moses2/System.cpp b/moses2/System.cpp index 931bed296..72ab795e2 100644 --- a/moses2/System.cpp +++ b/moses2/System.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include "System.h" #include "FF/FeatureFunction.h" #include "TranslationModel/UnknownWordPenalty.h" @@ -189,14 +188,8 @@ Recycler &System::GetHypoRecycler() const Batch &System::GetBatch(MemPool &pool) const { - Batch *obj; - obj = m_batch.get(); - if (obj == NULL) { - obj = new Batch(pool); - m_batch.reset(obj); - } - assert(obj); - return *obj; + thread_local Batch obj(pool); + return obj; } void System::IsPb() diff --git a/moses2/System.h b/moses2/System.h index e2f36b62d..a682ba0fa 100644 --- a/moses2/System.h +++ b/moses2/System.h @@ -8,7 +8,6 @@ #pragma once #include #include -#include #include #include #include "FF/FeatureFunctions.h" @@ -66,8 +65,6 @@ public: protected: mutable FactorCollection m_vocab; - mutable boost::thread_specific_ptr m_batch; - void LoadWeights(); void LoadMappings(); void LoadDecodeGraphBackoff(); diff --git a/moses2/legacy/Util2.h b/moses2/legacy/Util2.h index eef638f93..3e64766f8 100644 --- a/moses2/legacy/Util2.h +++ b/moses2/legacy/Util2.h @@ -310,20 +310,6 @@ void Swap(T &a, T &b) b = c; } -template -T &GetThreadSpecificObj(boost::thread_specific_ptr &coll) -{ - T *obj; - obj = coll.get(); - if (obj == NULL) { - obj = new T; - coll.reset(obj); - } - assert(obj); - return *obj; - -} - // grab the underlying contain of priority queue template S& Container(std::priority_queue& q) From c74abcb9200397ab805b92d79a0af4c9ecffdbbd Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 10 Feb 2017 00:39:04 +0000 Subject: [PATCH 118/176] Revert "completely delete boost::thread_specific_ptr" This reverts commit fd2830162765e102db205e12feaf110496575755. --- moses2/System.cpp | 11 +++++++++-- moses2/System.h | 3 +++ moses2/legacy/Util2.h | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/moses2/System.cpp b/moses2/System.cpp index b8f3cffda..937101a3c 100644 --- a/moses2/System.cpp +++ b/moses2/System.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "System.h" #include "FF/FeatureFunction.h" #include "TranslationModel/UnknownWordPenalty.h" @@ -185,8 +186,14 @@ Recycler &System::GetHypoRecycler() const Batch &System::GetBatch(MemPool &pool) const { - thread_local Batch obj(pool); - return obj; + Batch *obj; + obj = m_batch.get(); + if (obj == NULL) { + obj = new Batch(pool); + m_batch.reset(obj); + } + assert(obj); + return *obj; } void System::IsPb() diff --git a/moses2/System.h b/moses2/System.h index a682ba0fa..e2f36b62d 100644 --- a/moses2/System.h +++ b/moses2/System.h @@ -8,6 +8,7 @@ #pragma once #include #include +#include #include #include #include "FF/FeatureFunctions.h" @@ -65,6 +66,8 @@ public: protected: mutable FactorCollection m_vocab; + mutable boost::thread_specific_ptr m_batch; + void LoadWeights(); void LoadMappings(); void LoadDecodeGraphBackoff(); diff --git a/moses2/legacy/Util2.h b/moses2/legacy/Util2.h index fd0d1daed..47b4a08ec 100644 --- a/moses2/legacy/Util2.h +++ b/moses2/legacy/Util2.h @@ -305,6 +305,20 @@ void Swap(T &a, T &b) b = c; } +template +T &GetThreadSpecificObj(boost::thread_specific_ptr &coll) +{ + T *obj; + obj = coll.get(); + if (obj == NULL) { + obj = new T; + coll.reset(obj); + } + assert(obj); + return *obj; + +} + // grab the underlying contain of priority queue template S& Container(std::priority_queue& q) From a6ba68148bf3ec516efae1b12d44e5b2c6c32ab2 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 10 Feb 2017 00:39:54 +0000 Subject: [PATCH 119/176] Revert "use thread_local" This reverts commit 940e08a9462a375bb0fbdf373b3ffbc98756227c. --- moses2/System.cpp | 9 +++------ moses2/System.h | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/moses2/System.cpp b/moses2/System.cpp index 937101a3c..63df967fe 100644 --- a/moses2/System.cpp +++ b/moses2/System.cpp @@ -163,14 +163,12 @@ void System::LoadDecodeGraphBackoff() MemPool &System::GetSystemPool() const { - thread_local MemPool obj; - return obj; + return GetThreadSpecificObj(m_systemPool); } MemPool &System::GetManagerPool() const { - thread_local MemPool obj; - return obj; + return GetThreadSpecificObj(m_managerPool); } FactorCollection &System::GetVocab() const @@ -180,8 +178,7 @@ FactorCollection &System::GetVocab() const Recycler &System::GetHypoRecycler() const { - thread_local Recycler obj; - return obj; + return GetThreadSpecificObj(m_hypoRecycler); } Batch &System::GetBatch(MemPool &pool) const diff --git a/moses2/System.h b/moses2/System.h index e2f36b62d..1d60e96a0 100644 --- a/moses2/System.h +++ b/moses2/System.h @@ -65,6 +65,10 @@ public: protected: mutable FactorCollection m_vocab; + mutable boost::thread_specific_ptr m_managerPool; + mutable boost::thread_specific_ptr m_systemPool; + + mutable boost::thread_specific_ptr > m_hypoRecycler; mutable boost::thread_specific_ptr m_batch; From 3b65dc39a7f54e2aeef487031a733d8a26486ce5 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 11 Feb 2017 00:02:59 +0000 Subject: [PATCH 120/176] remove all Moses headers --- contrib/other-builds/moses2/.project | 5 +++++ moses2/legacy/Util2.h | 9 +++++++++ moses2/legacy/xmlrpc-c.h | 10 ++++++++++ moses2/parameters/AllOptions.cpp | 1 + moses2/parameters/ContextParameters.cpp | 1 - moses2/parameters/ContextParameters.h | 2 -- moses2/parameters/InputOptions.cpp | 2 -- moses2/parameters/OOVHandlingOptions.cpp | 2 -- moses2/parameters/OptionsBaseClass.cpp | 1 - moses2/parameters/OptionsBaseClass.h | 2 +- 10 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 moses2/legacy/xmlrpc-c.h diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project index 87263a312..431ba885e 100644 --- a/contrib/other-builds/moses2/.project +++ b/contrib/other-builds/moses2/.project @@ -982,6 +982,11 @@ 1 PARENT-3-PROJECT_LOC/moses2/legacy/gzfilebuf.h + + legacy/xmlrpc-c.h + 1 + PARENT-3-PROJECT_LOC/moses2/legacy/xmlrpc-c.h + parameters/AllOptions.cpp 1 diff --git a/moses2/legacy/Util2.h b/moses2/legacy/Util2.h index 47b4a08ec..c2b8cf58f 100644 --- a/moses2/legacy/Util2.h +++ b/moses2/legacy/Util2.h @@ -17,6 +17,15 @@ namespace Moses2 { +#ifdef TRACE_ERR +#undef TRACE_ERR +#endif +#ifdef TRACE_ENABLE +#define TRACE_ERR(str) do { std::cerr << str; } while (false) +#else +#define TRACE_ERR(str) do {} while (false) +#endif + template class UnorderedComparer { diff --git a/moses2/legacy/xmlrpc-c.h b/moses2/legacy/xmlrpc-c.h new file mode 100644 index 000000000..1cdccad16 --- /dev/null +++ b/moses2/legacy/xmlrpc-c.h @@ -0,0 +1,10 @@ +#pragma once + +#ifdef HAVE_XMLRPC_C +#include +#else +namespace xmlrpc_c +{ +class value; +} +#endif diff --git a/moses2/parameters/AllOptions.cpp b/moses2/parameters/AllOptions.cpp index 85b54e4b4..c6341139b 100644 --- a/moses2/parameters/AllOptions.cpp +++ b/moses2/parameters/AllOptions.cpp @@ -1,5 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #include "../legacy/Parameter.h" +#include "../legacy/Util2.h" #include "AllOptions.h" namespace Moses2 diff --git a/moses2/parameters/ContextParameters.cpp b/moses2/parameters/ContextParameters.cpp index 64a3307a8..3c5b894ee 100644 --- a/moses2/parameters/ContextParameters.cpp +++ b/moses2/parameters/ContextParameters.cpp @@ -1,5 +1,4 @@ #include "ContextParameters.h" -#include "moses/Util.h" #include "../legacy/Parameter.h" namespace Moses2 diff --git a/moses2/parameters/ContextParameters.h b/moses2/parameters/ContextParameters.h index 54923c548..5226e8eca 100644 --- a/moses2/parameters/ContextParameters.h +++ b/moses2/parameters/ContextParameters.h @@ -1,8 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #pragma once #include -#include "moses/TypeDef.h" -#include "moses/Util.h" #include "OptionsBaseClass.h" namespace Moses2 diff --git a/moses2/parameters/InputOptions.cpp b/moses2/parameters/InputOptions.cpp index 435e70e96..7a8c9242c 100644 --- a/moses2/parameters/InputOptions.cpp +++ b/moses2/parameters/InputOptions.cpp @@ -2,8 +2,6 @@ #include "InputOptions.h" #include #include -// #include "moses/StaticData.h" -#include "moses/TypeDef.h" #include "../legacy/Parameter.h" namespace Moses2 diff --git a/moses2/parameters/OOVHandlingOptions.cpp b/moses2/parameters/OOVHandlingOptions.cpp index 305526386..c7a5e30f2 100644 --- a/moses2/parameters/OOVHandlingOptions.cpp +++ b/moses2/parameters/OOVHandlingOptions.cpp @@ -2,8 +2,6 @@ #include "OOVHandlingOptions.h" #include #include -#include "moses/StaticData.h" -#include "moses/TypeDef.h" #include "../legacy/Parameter.h" namespace Moses2 diff --git a/moses2/parameters/OptionsBaseClass.cpp b/moses2/parameters/OptionsBaseClass.cpp index cab6b3cf5..8ccb0a563 100644 --- a/moses2/parameters/OptionsBaseClass.cpp +++ b/moses2/parameters/OptionsBaseClass.cpp @@ -1,6 +1,5 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- #include "OptionsBaseClass.h" -#include "moses/Util.h" #include "../legacy/Parameter.h" namespace Moses2 diff --git a/moses2/parameters/OptionsBaseClass.h b/moses2/parameters/OptionsBaseClass.h index 088a05490..5265e9b23 100644 --- a/moses2/parameters/OptionsBaseClass.h +++ b/moses2/parameters/OptionsBaseClass.h @@ -1,6 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #pragma once -#include "moses/xmlrpc-c.h" +#include "../legacy/xmlrpc-c.h" #include #include namespace Moses2 From 75a736964819fc77a72ac360d7ae5a46f76bbaf5 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 10:05:58 +0000 Subject: [PATCH 121/176] apply patch for windows compatibility --- moses2/FF/FeatureRegistry.cpp | 2 +- moses2/FF/LexicalReordering/LexicalReordering.cpp | 6 ++++++ moses2/FF/LexicalReordering/LexicalReordering.h | 5 +++++ moses2/FF/StatefulFeatureFunction.cpp | 2 ++ moses2/HypothesisColl.cpp | 2 +- moses2/LM/GPULM.cpp | 4 +++- moses2/LM/GPULM.h | 2 ++ moses2/LM/KENLMBatch.cpp | 8 +++++--- moses2/LM/KENLMBatch.h | 2 ++ moses2/MemPoolAllocator.h | 4 ++++ moses2/TranslationModel/CompactPT/MmapAllocator.h | 7 ++++++- moses2/TranslationModel/CompactPT/MurmurHash3.h | 7 ++++--- moses2/TranslationModel/ProbingPT/ProbingPT.cpp | 6 +++--- .../TranslationModel/ProbingPT/probing_hash_utils.hh | 4 ++++ moses2/TranslationModel/ProbingPT/storing.cpp | 4 ++++ moses2/legacy/Matrix.h | 2 +- moses2/legacy/Parameter.cpp | 3 ++- moses2/legacy/ThreadPool.cpp | 11 +++++++---- moses2/parameters/AllOptions.cpp | 3 +-- moses2/server/Server.cpp | 8 +++++++- util/tempfile.hh | 4 ++-- 21 files changed, 72 insertions(+), 24 deletions(-) diff --git a/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp index d6038d982..bb8ee5058 100644 --- a/moses2/FF/FeatureRegistry.cpp +++ b/moses2/FF/FeatureRegistry.cpp @@ -40,7 +40,7 @@ class KenFactory: public FeatureFactory { public: FeatureFunction *Create(size_t startInd, const std::string &line) const { - ConstructKenLM(startInd, line); + return ConstructKenLM(startInd, line); } }; diff --git a/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp index dd5529640..292df42a4 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses2/FF/LexicalReordering/LexicalReordering.cpp @@ -12,7 +12,9 @@ #include "PhraseBasedReorderingState.h" #include "BidirectionalReorderingState.h" #include "../../TranslationModel/PhraseTable.h" +#ifndef NO_COMPACT_TABLES #include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h" +#endif #include "../../System.h" #include "../../PhraseBased/PhraseImpl.h" #include "../../PhraseBased/Manager.h" @@ -51,10 +53,12 @@ void LexicalReordering::Load(System &system) if (m_propertyInd >= 0) { // Using integrate Lex RO. No loading needed +#ifndef NO_COMPACT_TABLES } else if (FileExists(m_path + ".minlexr")) { m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr", m_FactorsF, m_FactorsE, m_FactorsC); m_blank = new (pool.Allocate()) PhraseImpl(pool, 0); +#endif } else { m_coll = new Coll(); InputFileStream file(m_path); @@ -145,6 +149,7 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, if (m_propertyInd >= 0) { SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd); targetPhrase.ffData[m_PhraseTableInd] = scoreArr; +#ifndef NO_COMPACT_TABLES } else if (m_compactModel) { // using external compact binary model const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase, @@ -160,6 +165,7 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, } else { targetPhrase.ffData[m_PhraseTableInd] = NULL; } +#endif } else if (m_coll) { // using external memory model diff --git a/moses2/FF/LexicalReordering/LexicalReordering.h b/moses2/FF/LexicalReordering/LexicalReordering.h index da8e7780f..688300037 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.h +++ b/moses2/FF/LexicalReordering/LexicalReordering.h @@ -76,7 +76,12 @@ protected: int m_propertyInd; // COMPACT MODEL +#ifndef NO_COMPACT_TABLES LexicalReorderingTableCompact *m_compactModel; +#else + void *m_compactModel; +#endif + Phrase *m_blank; // MEMORY MODEL diff --git a/moses2/FF/StatefulFeatureFunction.cpp b/moses2/FF/StatefulFeatureFunction.cpp index d120a6bc4..6d8045dd3 100644 --- a/moses2/FF/StatefulFeatureFunction.cpp +++ b/moses2/FF/StatefulFeatureFunction.cpp @@ -4,7 +4,9 @@ * Created on: 24 Oct 2015 * Author: hieu */ +#ifdef __linux #include +#endif #include #include #include diff --git a/moses2/HypothesisColl.cpp b/moses2/HypothesisColl.cpp index b33ba7835..703da7dbd 100644 --- a/moses2/HypothesisColl.cpp +++ b/moses2/HypothesisColl.cpp @@ -178,7 +178,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) Recycler &recycler = mgr.GetHypoRecycle(); - const HypothesisBase *sortedHypos[GetSize()]; + const HypothesisBase **sortedHypos = (const HypothesisBase **) new HypothesisBase *[GetSize()]; SortHypos(mgr, sortedHypos); // update worse score diff --git a/moses2/LM/GPULM.cpp b/moses2/LM/GPULM.cpp index 98ee22b11..714ff8ff2 100644 --- a/moses2/LM/GPULM.cpp +++ b/moses2/LM/GPULM.cpp @@ -8,11 +8,13 @@ #include #include +#ifdef _linux #include +#include +#endif #include #include #include -#include #include "GPULM.h" #include "../Phrase.h" diff --git a/moses2/LM/GPULM.h b/moses2/LM/GPULM.h index 33f97a313..6a3fb49f7 100644 --- a/moses2/LM/GPULM.h +++ b/moses2/LM/GPULM.h @@ -9,7 +9,9 @@ #include #include #include +#ifdef __linux #include +#endif #include "../FF/StatefulFeatureFunction.h" #include "lm/model.hh" diff --git a/moses2/LM/KENLMBatch.cpp b/moses2/LM/KENLMBatch.cpp index b35004bba..d36430961 100644 --- a/moses2/LM/KENLMBatch.cpp +++ b/moses2/LM/KENLMBatch.cpp @@ -8,11 +8,13 @@ #include #include +#ifdef _linux #include +#include +#endif #include #include #include -#include #include "KENLMBatch.h" #include "../Phrase.h" @@ -187,8 +189,8 @@ void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr, const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1); std::size_t position = begin; - typename Model::State aux_state; - typename Model::State *state0 = &stateCast.state, *state1 = &aux_state; + Model::State aux_state; + Model::State *state0 = &stateCast.state, *state1 = &aux_state; float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)), *state0); diff --git a/moses2/LM/KENLMBatch.h b/moses2/LM/KENLMBatch.h index 3d27bc36b..1510381b5 100644 --- a/moses2/LM/KENLMBatch.h +++ b/moses2/LM/KENLMBatch.h @@ -9,7 +9,9 @@ #include #include #include +#ifdef __linux #include +#endif #include "../FF/StatefulFeatureFunction.h" #include "lm/model.hh" diff --git a/moses2/MemPoolAllocator.h b/moses2/MemPoolAllocator.h index cb0a546c7..8076338b6 100644 --- a/moses2/MemPoolAllocator.h +++ b/moses2/MemPoolAllocator.h @@ -72,6 +72,10 @@ public: return false; } + MemPoolAllocator& operator=(const MemPoolAllocator& allocator) { + return *this; + } + MemPool &m_pool; protected: }; diff --git a/moses2/TranslationModel/CompactPT/MmapAllocator.h b/moses2/TranslationModel/CompactPT/MmapAllocator.h index c92166da6..c2923cad5 100644 --- a/moses2/TranslationModel/CompactPT/MmapAllocator.h +++ b/moses2/TranslationModel/CompactPT/MmapAllocator.h @@ -25,13 +25,14 @@ #include #include #include -#include #if defined(_WIN32) || defined(_WIN64) +#define _WINSOCKAPI_ #include #include #else #include +#include #endif #include "util/mmap.hh" @@ -129,7 +130,11 @@ public: #endif if (!m_fixed) { size_t read = 0; +#ifdef _WIN32 + read += _chsize_s(m_file_desc, m_map_size); +#else read += ftruncate(m_file_desc, m_map_size); +#endif m_data_ptr = (char *) util::MapOrThrow(m_map_size, true, map_shared, false, m_file_desc, 0); return (pointer) m_data_ptr; diff --git a/moses2/TranslationModel/CompactPT/MurmurHash3.h b/moses2/TranslationModel/CompactPT/MurmurHash3.h index f513008cf..ef885a6d4 100644 --- a/moses2/TranslationModel/CompactPT/MurmurHash3.h +++ b/moses2/TranslationModel/CompactPT/MurmurHash3.h @@ -12,9 +12,10 @@ #if defined(_MSC_VER) -typedef unsigned char uint8_t; -typedef unsigned long uint32_t; -typedef unsigned __int64 uint64_t; +#include +//typedef unsigned char uint8_t; +//typedef unsigned long uint32_t; +//typedef unsigned __int64 uint64_t; // Other compilers diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index d405bf026..86231842d 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -240,7 +240,7 @@ std::pair ProbingPT::GetKey(const Phrase &sourcePh size_t sourceSize = sourcePhrase.GetSize(); assert(sourceSize); - uint64_t probingSource[sourceSize]; + uint64_t *probingSource = new uint64_t[sourceSize]; GetSourceProbingIds(sourcePhrase, ret.first, probingSource); if (!ret.first) { // source phrase contains a word unknown in the pt. @@ -317,7 +317,7 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( } } else { // log score 1st - SCORE logScores[totalNumScores]; + SCORE *logScores = new SCORE[totalNumScores]; for (size_t i = 0; i < totalNumScores; ++i) { logScores[i] = FloorScore(TransformScore(scores[i])); } @@ -622,7 +622,7 @@ SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG( } } else { // log score 1st - SCORE logScores[totalNumScores]; + SCORE *logScores = new SCORE[totalNumScores]; for (size_t i = 0; i < totalNumScores; ++i) { logScores[i] = FloorScore(TransformScore(scores[i])); } diff --git a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh index 7e275510a..fa5a9a5a0 100644 --- a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh +++ b/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh @@ -2,7 +2,11 @@ #include "util/probing_hash_table.hh" +#if defined(_WIN32) || defined(_WIN64) +#include +#else #include +#endif #include #include #include diff --git a/moses2/TranslationModel/ProbingPT/storing.cpp b/moses2/TranslationModel/ProbingPT/storing.cpp index c27e99634..862aab3e8 100644 --- a/moses2/TranslationModel/ProbingPT/storing.cpp +++ b/moses2/TranslationModel/ProbingPT/storing.cpp @@ -67,6 +67,9 @@ void createProbingPT(const std::string &phrasetable_path, const std::string &basepath, int num_scores, int num_lex_scores, bool log_prob, int max_cache_size, bool scfg) { +#if defined(_WIN32) || defined(_WIN64) + std::cerr << "Create not implemented for Windows" << std::endl; +#else std::cerr << "Starting..." << std::endl; //Get basepath and create directory if missing @@ -231,6 +234,7 @@ void createProbingPT(const std::string &phrasetable_path, configfile << "num_lex_scores\t" << num_lex_scores << '\n'; configfile << "log_prob\t" << log_prob << '\n'; configfile.close(); +#endif } size_t countUniqueSource(const std::string &path) diff --git a/moses2/legacy/Matrix.h b/moses2/legacy/Matrix.h index ddfde9027..e2dbbba2c 100644 --- a/moses2/legacy/Matrix.h +++ b/moses2/legacy/Matrix.h @@ -43,7 +43,7 @@ public: m_array = pool.Allocate(rows * cols); } - ~Matrix(); // not implemented + //~Matrix(); // not implemented // set upper triangle void InitTriangle(const T &val) { diff --git a/moses2/legacy/Parameter.cpp b/moses2/legacy/Parameter.cpp index 6ba30f651..f8af5f90d 100644 --- a/moses2/legacy/Parameter.cpp +++ b/moses2/legacy/Parameter.cpp @@ -571,7 +571,8 @@ bool Parameter::LoadParam(int argc, char* xargv[]) { // legacy parameter handling: all parameters are expected // to start with a single dash - char* argv[argc + 1]; + char **argv = new char*[argc + 1]; + for (int i = 0; i < argc; ++i) { argv[i] = xargv[i]; if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') ++argv[i]; diff --git a/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp index 8ab0976ac..2e5c48e36 100644 --- a/moses2/legacy/ThreadPool.cpp +++ b/moses2/legacy/ThreadPool.cpp @@ -19,12 +19,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include +#ifdef __linux #include -#include +#include +#endif #include #include #include -#include +#include #include "ThreadPool.h" @@ -38,9 +40,10 @@ namespace Moses2 ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset, int cpuAffinityIncr) : - m_stopped(false), m_stopping(false), m_queueLimit(0) + m_stopped(false), m_stopping(false), m_queueLimit(0) { - size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); + //size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); + size_t numCPU = std::thread::hardware_concurrency(); int cpuInd = cpuAffinityOffset % numCPU; for (size_t i = 0; i < numThreads; ++i) { diff --git a/moses2/parameters/AllOptions.cpp b/moses2/parameters/AllOptions.cpp index c6341139b..954d7e8e7 100644 --- a/moses2/parameters/AllOptions.cpp +++ b/moses2/parameters/AllOptions.cpp @@ -62,8 +62,7 @@ sanity_check() // RecoverPath should only be used with confusion net or word lattice input if (output.RecoverPath && input.input_type == SentenceInput) { - TRACE_ERR("--recover-input-path should only be used with " - <<"confusion net or word lattice input!\n"); + TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n"); output.RecoverPath = false; } diff --git a/moses2/server/Server.cpp b/moses2/server/Server.cpp index 2293c62cd..57218c374 100644 --- a/moses2/server/Server.cpp +++ b/moses2/server/Server.cpp @@ -45,7 +45,13 @@ void Server::run(System &system) pidfilename << "/tmp/moses-server." << m_server_options.port << ".pid"; m_pidfile = pidfilename.str(); std::ofstream pidfile(m_pidfile.c_str()); - pidfile << getpid() << std::endl; + +#ifdef _WIN32 + int thePid = GetCurrentProcessId(); +#else + int thePid = getpid(); +#endif + pidfile << thePid << std::endl; pidfile.close(); cerr << "Listening on port " << m_server_options.port << std::endl; if (m_server_options.is_serial) { diff --git a/util/tempfile.hh b/util/tempfile.hh index 9c28346fc..f4fb1860c 100644 --- a/util/tempfile.hh +++ b/util/tempfile.hh @@ -27,7 +27,7 @@ std::string temp_location() { #if defined(_WIN32) || defined(_WIN64) char dir_buffer[1000]; - if (GetTempPath(1000, dir_buffer) == 0) + if (GetTempPathA(1000, dir_buffer) == 0) throw std::runtime_error("Could not read temporary directory."); return std::string(dir_buffer); #else @@ -51,7 +51,7 @@ std::string windows_tmpnam() { const std::string tmp = temp_location(); char output_buffer[MAX_PATH]; - if (GetTempFileName(tmp.c_str(), "tmp", 0, output_buffer) == 0) + if (GetTempFileNameA(tmp.c_str(), "tmp", 0, output_buffer) == 0) throw std::runtime_error("Could not create temporary file name."); return output_buffer; } From f8f4087aac52a5686f2a5c7a498daba670919165 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 10:22:10 +0000 Subject: [PATCH 122/176] change new -> alloca --- moses2/HypothesisColl.cpp | 2 +- moses2/TranslationModel/ProbingPT/ProbingPT.cpp | 6 +++--- moses2/legacy/Parameter.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/moses2/HypothesisColl.cpp b/moses2/HypothesisColl.cpp index 703da7dbd..18046bd4c 100644 --- a/moses2/HypothesisColl.cpp +++ b/moses2/HypothesisColl.cpp @@ -178,7 +178,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) Recycler &recycler = mgr.GetHypoRecycle(); - const HypothesisBase **sortedHypos = (const HypothesisBase **) new HypothesisBase *[GetSize()]; + const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *)); SortHypos(mgr, sortedHypos); // update worse score diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 86231842d..852f0a7b7 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -240,7 +240,7 @@ std::pair ProbingPT::GetKey(const Phrase &sourcePh size_t sourceSize = sourcePhrase.GetSize(); assert(sourceSize); - uint64_t *probingSource = new uint64_t[sourceSize]; + uint64_t *probingSource = (uint64_t*) alloca(sourceSize * sizeof(uint64_t)); GetSourceProbingIds(sourcePhrase, ret.first, probingSource); if (!ret.first) { // source phrase contains a word unknown in the pt. @@ -317,7 +317,7 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( } } else { // log score 1st - SCORE *logScores = new SCORE[totalNumScores]; + SCORE *logScores = (SCORE*) alloca(totalNumScores * sizeof(SCORE)); for (size_t i = 0; i < totalNumScores; ++i) { logScores[i] = FloorScore(TransformScore(scores[i])); } @@ -622,7 +622,7 @@ SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG( } } else { // log score 1st - SCORE *logScores = new SCORE[totalNumScores]; + SCORE *logScores = (SCORE*) alloca(totalNumScores * sizeof(SCORE)); for (size_t i = 0; i < totalNumScores; ++i) { logScores[i] = FloorScore(TransformScore(scores[i])); } diff --git a/moses2/legacy/Parameter.cpp b/moses2/legacy/Parameter.cpp index f8af5f90d..7376c1099 100644 --- a/moses2/legacy/Parameter.cpp +++ b/moses2/legacy/Parameter.cpp @@ -571,7 +571,7 @@ bool Parameter::LoadParam(int argc, char* xargv[]) { // legacy parameter handling: all parameters are expected // to start with a single dash - char **argv = new char*[argc + 1]; + char **argv = (char**) alloca(argc * sizeof(char*)); for (int i = 0; i < argc; ++i) { argv[i] = xargv[i]; From de009c1fca02f8bcd60cb65f39eaa8d8d80c118c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 10:30:13 +0000 Subject: [PATCH 123/176] change NO_COMPACT_TABLES -> HAVE_CMPH --- .../LexicalReordering/LexicalReordering.cpp | 28 +++++++++++++------ .../FF/LexicalReordering/LexicalReordering.h | 4 +-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp index 292df42a4..c84b30a59 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses2/FF/LexicalReordering/LexicalReordering.cpp @@ -12,9 +12,6 @@ #include "PhraseBasedReorderingState.h" #include "BidirectionalReorderingState.h" #include "../../TranslationModel/PhraseTable.h" -#ifndef NO_COMPACT_TABLES -#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h" -#endif #include "../../System.h" #include "../../PhraseBased/PhraseImpl.h" #include "../../PhraseBased/Manager.h" @@ -24,6 +21,11 @@ #include "../../legacy/InputFileStream.h" #include "../../legacy/Util2.h" +#ifdef HAVE_CMPH +#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h" +#endif + + using namespace std; namespace Moses2 @@ -31,9 +33,15 @@ namespace Moses2 /////////////////////////////////////////////////////////////////////// -LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) : - StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank( - NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL) +LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) +: StatefulFeatureFunction(startInd, line) +, m_blank(NULL) +, m_propertyInd(-1) +, m_coll(NULL) +, m_configuration(NULL) +#ifdef HAVE_CMPH +, m_compactModel(NULL) +#endif { ReadParameters(); assert(m_configuration); @@ -42,9 +50,11 @@ LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) : LexicalReordering::~LexicalReordering() { - delete m_compactModel; delete m_coll; delete m_configuration; +#ifdef HAVE_CMPH + delete m_compactModel; +#endif } void LexicalReordering::Load(System &system) @@ -53,7 +63,7 @@ void LexicalReordering::Load(System &system) if (m_propertyInd >= 0) { // Using integrate Lex RO. No loading needed -#ifndef NO_COMPACT_TABLES +#ifdef HAVE_CMPH } else if (FileExists(m_path + ".minlexr")) { m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr", m_FactorsF, m_FactorsE, m_FactorsC); @@ -149,7 +159,7 @@ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool, if (m_propertyInd >= 0) { SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd); targetPhrase.ffData[m_PhraseTableInd] = scoreArr; -#ifndef NO_COMPACT_TABLES +#ifdef HAVE_CMPH } else if (m_compactModel) { // using external compact binary model const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase, diff --git a/moses2/FF/LexicalReordering/LexicalReordering.h b/moses2/FF/LexicalReordering/LexicalReordering.h index 688300037..59f63eba2 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.h +++ b/moses2/FF/LexicalReordering/LexicalReordering.h @@ -76,10 +76,8 @@ protected: int m_propertyInd; // COMPACT MODEL -#ifndef NO_COMPACT_TABLES +#ifdef HAVE_CMPH LexicalReorderingTableCompact *m_compactModel; -#else - void *m_compactModel; #endif Phrase *m_blank; From 54c2eae4b492d972b251fed0d632ebafe0e47ca8 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Wed, 15 Feb 2017 11:03:23 +0000 Subject: [PATCH 124/176] daily automatic beautifier --- moses2/FF/LexicalReordering/LexicalReordering.cpp | 12 ++++++------ moses2/MemPoolAllocator.h | 2 +- moses2/TranslationModel/CompactPT/MmapAllocator.h | 2 +- moses2/TranslationModel/ProbingPT/storing.cpp | 2 +- moses2/legacy/ThreadPool.cpp | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/moses2/FF/LexicalReordering/LexicalReordering.cpp b/moses2/FF/LexicalReordering/LexicalReordering.cpp index c84b30a59..6f510574c 100644 --- a/moses2/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses2/FF/LexicalReordering/LexicalReordering.cpp @@ -34,13 +34,13 @@ namespace Moses2 /////////////////////////////////////////////////////////////////////// LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) -: StatefulFeatureFunction(startInd, line) -, m_blank(NULL) -, m_propertyInd(-1) -, m_coll(NULL) -, m_configuration(NULL) + : StatefulFeatureFunction(startInd, line) + , m_blank(NULL) + , m_propertyInd(-1) + , m_coll(NULL) + , m_configuration(NULL) #ifdef HAVE_CMPH -, m_compactModel(NULL) + , m_compactModel(NULL) #endif { ReadParameters(); diff --git a/moses2/MemPoolAllocator.h b/moses2/MemPoolAllocator.h index 8076338b6..994bb7711 100644 --- a/moses2/MemPoolAllocator.h +++ b/moses2/MemPoolAllocator.h @@ -73,7 +73,7 @@ public: } MemPoolAllocator& operator=(const MemPoolAllocator& allocator) { - return *this; + return *this; } MemPool &m_pool; diff --git a/moses2/TranslationModel/CompactPT/MmapAllocator.h b/moses2/TranslationModel/CompactPT/MmapAllocator.h index c2923cad5..09ba58d93 100644 --- a/moses2/TranslationModel/CompactPT/MmapAllocator.h +++ b/moses2/TranslationModel/CompactPT/MmapAllocator.h @@ -131,7 +131,7 @@ public: if (!m_fixed) { size_t read = 0; #ifdef _WIN32 - read += _chsize_s(m_file_desc, m_map_size); + read += _chsize_s(m_file_desc, m_map_size); #else read += ftruncate(m_file_desc, m_map_size); #endif diff --git a/moses2/TranslationModel/ProbingPT/storing.cpp b/moses2/TranslationModel/ProbingPT/storing.cpp index 862aab3e8..ff26bc72b 100644 --- a/moses2/TranslationModel/ProbingPT/storing.cpp +++ b/moses2/TranslationModel/ProbingPT/storing.cpp @@ -68,7 +68,7 @@ void createProbingPT(const std::string &phrasetable_path, bool log_prob, int max_cache_size, bool scfg) { #if defined(_WIN32) || defined(_WIN64) - std::cerr << "Create not implemented for Windows" << std::endl; + std::cerr << "Create not implemented for Windows" << std::endl; #else std::cerr << "Starting..." << std::endl; diff --git a/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp index 2e5c48e36..5610f2935 100644 --- a/moses2/legacy/ThreadPool.cpp +++ b/moses2/legacy/ThreadPool.cpp @@ -40,7 +40,7 @@ namespace Moses2 ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset, int cpuAffinityIncr) : - m_stopped(false), m_stopping(false), m_queueLimit(0) + m_stopped(false), m_stopping(false), m_queueLimit(0) { //size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); size_t numCPU = std::thread::hardware_concurrency(); From d7b67e3cd2d218fc4c9c637e16707c50c53f773c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 14:26:01 +0000 Subject: [PATCH 125/176] go back to using sysconf() for linux/osx. Runs ok on new systems but not thor. std::thread::hardware_concurrency() returns 0 on thor --- moses2/legacy/ThreadPool.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp index 5610f2935..dad965a94 100644 --- a/moses2/legacy/ThreadPool.cpp +++ b/moses2/legacy/ThreadPool.cpp @@ -42,8 +42,13 @@ ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset, int cpuAffinityIncr) : m_stopped(false), m_stopping(false), m_queueLimit(0) { - //size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); +#if defined(_WIN32) || defined(_WIN64) size_t numCPU = std::thread::hardware_concurrency(); +#else + size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); +#endif + //cerr << "numCPU=" << numCPU << endl; + int cpuInd = cpuAffinityOffset % numCPU; for (size_t i = 0; i < numThreads; ++i) { From 099020cd41b28d9d1f72d6e412a4873cc3c1ac47 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Wed, 15 Feb 2017 14:27:27 +0000 Subject: [PATCH 126/176] daily automatic beautifier --- moses2/legacy/ThreadPool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses2/legacy/ThreadPool.cpp b/moses2/legacy/ThreadPool.cpp index dad965a94..43423e545 100644 --- a/moses2/legacy/ThreadPool.cpp +++ b/moses2/legacy/ThreadPool.cpp @@ -44,11 +44,11 @@ ThreadPool::ThreadPool(size_t numThreads, int cpuAffinityOffset, { #if defined(_WIN32) || defined(_WIN64) size_t numCPU = std::thread::hardware_concurrency(); -#else +#else size_t numCPU = sysconf(_SC_NPROCESSORS_ONLN); #endif //cerr << "numCPU=" << numCPU << endl; - + int cpuInd = cpuAffinityOffset % numCPU; for (size_t i = 0; i < numThreads; ++i) { From 8f456299c448eb83a4f71f225cc2bd9d0e4d3212 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 20:37:50 +0000 Subject: [PATCH 127/176] start probing pt lib --- Jamroot | 1 + probingpt/Jamfile | 7 +++++++ probingpt/Main.cpp | 5 +++++ probingpt/temp.cpp | 5 +++++ 4 files changed, 18 insertions(+) create mode 100644 probingpt/Jamfile create mode 100644 probingpt/Main.cpp create mode 100644 probingpt/temp.cpp diff --git a/Jamroot b/Jamroot index ea4650670..6cc7c9427 100644 --- a/Jamroot +++ b/Jamroot @@ -316,6 +316,7 @@ contrib/c++tokenizer//tokenizer contrib/expected-bleu-training//train-expected-bleu contrib/expected-bleu-training//prepare-expected-bleu-training +probingpt//programs moses2//programs ; diff --git a/probingpt/Jamfile b/probingpt/Jamfile new file mode 100644 index 000000000..214da47ac --- /dev/null +++ b/probingpt/Jamfile @@ -0,0 +1,7 @@ +lib probingpt : + temp.cpp + ; + +exe ppt : Main.cpp probingpt ; + +alias programs : ppt ; diff --git a/probingpt/Main.cpp b/probingpt/Main.cpp new file mode 100644 index 000000000..d04a8018b --- /dev/null +++ b/probingpt/Main.cpp @@ -0,0 +1,5 @@ + +int main() +{ + +} diff --git a/probingpt/temp.cpp b/probingpt/temp.cpp new file mode 100644 index 000000000..e3d03f997 --- /dev/null +++ b/probingpt/temp.cpp @@ -0,0 +1,5 @@ + +int foo() +{ + return 5; +} From 74b123649ea36af6a66790b7dd922ede91d0b4a2 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 21:26:28 +0000 Subject: [PATCH 128/176] probing pt lib compiles --- misc/Jamfile | 7 +- moses2/Jamfile | 10 +- {misc => probingpt}/CreateProbingPT.cpp | 8 +- probingpt/InputFileStream.cpp | 59 ++++++++++++ probingpt/InputFileStream.h | 46 +++++++++ probingpt/Jamfile | 18 +++- probingpt/Main.cpp | 5 - probingpt/OutputFileStream.cpp | 87 +++++++++++++++++ probingpt/OutputFileStream.h | 81 ++++++++++++++++ .../ProbingPT => probingpt}/StoreTarget.cpp | 20 ++-- .../ProbingPT => probingpt}/StoreTarget.h | 2 +- .../ProbingPT => probingpt}/StoreVocab.cpp | 2 +- .../ProbingPT => probingpt}/StoreVocab.h | 6 +- probingpt/gzfilebuf.h | 94 +++++++++++++++++++ .../ProbingPT => probingpt}/hash.cpp | 2 +- .../ProbingPT => probingpt}/hash.hh | 2 +- .../ProbingPT => probingpt}/line_splitter.cpp | 2 +- .../ProbingPT => probingpt}/line_splitter.hh | 2 +- .../probing_hash_utils.cpp | 2 +- .../probing_hash_utils.hh | 2 +- .../ProbingPT => probingpt}/querying.cpp | 24 ++--- .../ProbingPT => probingpt}/querying.hh | 6 +- .../ProbingPT => probingpt}/storing.cpp | 16 ++-- .../ProbingPT => probingpt}/storing.hh | 2 +- probingpt/temp.cpp | 5 - .../ProbingPT => probingpt}/vocabid.cpp | 8 +- .../ProbingPT => probingpt}/vocabid.hh | 2 +- 27 files changed, 438 insertions(+), 82 deletions(-) rename {misc => probingpt}/CreateProbingPT.cpp (95%) create mode 100644 probingpt/InputFileStream.cpp create mode 100644 probingpt/InputFileStream.h delete mode 100644 probingpt/Main.cpp create mode 100644 probingpt/OutputFileStream.cpp create mode 100644 probingpt/OutputFileStream.h rename {moses2/TranslationModel/ProbingPT => probingpt}/StoreTarget.cpp (93%) rename {moses2/TranslationModel/ProbingPT => probingpt}/StoreTarget.h (98%) rename {moses2/TranslationModel/ProbingPT => probingpt}/StoreVocab.cpp (88%) rename {moses2/TranslationModel/ProbingPT => probingpt}/StoreVocab.h (91%) create mode 100644 probingpt/gzfilebuf.h rename {moses2/TranslationModel/ProbingPT => probingpt}/hash.cpp (97%) rename {moses2/TranslationModel/ProbingPT => probingpt}/hash.hh (94%) rename {moses2/TranslationModel/ProbingPT => probingpt}/line_splitter.cpp (99%) rename {moses2/TranslationModel/ProbingPT => probingpt}/line_splitter.hh (98%) rename {moses2/TranslationModel/ProbingPT => probingpt}/probing_hash_utils.cpp (97%) rename {moses2/TranslationModel/ProbingPT => probingpt}/probing_hash_utils.hh (97%) rename {moses2/TranslationModel/ProbingPT => probingpt}/querying.cpp (88%) rename {moses2/TranslationModel/ProbingPT => probingpt}/querying.hh (95%) rename {moses2/TranslationModel/ProbingPT => probingpt}/storing.cpp (95%) rename {moses2/TranslationModel/ProbingPT => probingpt}/storing.hh (98%) delete mode 100644 probingpt/temp.cpp rename {moses2/TranslationModel/ProbingPT => probingpt}/vocabid.cpp (87%) rename {moses2/TranslationModel/ProbingPT => probingpt}/vocabid.hh (97%) diff --git a/misc/Jamfile b/misc/Jamfile index 135490a46..9539aaabd 100644 --- a/misc/Jamfile +++ b/misc/Jamfile @@ -30,11 +30,6 @@ else { alias programsMin ; } -exe CreateProbingPT : CreateProbingPT.cpp ..//boost_filesystem ../moses//moses ; -#exe QueryProbingPT : QueryProbingPT.cpp ..//boost_filesystem ../moses//moses ; - -alias programsProbing : CreateProbingPT ; #QueryProbingPT - exe merge-sorted : merge-sorted.cc ../moses//moses @@ -43,6 +38,6 @@ $(TOP)//boost_iostreams $(TOP)//boost_program_options ; -alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration ; +alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin merge-sorted prunePhraseTable pruneGeneration ; #processPhraseTable queryPhraseTable diff --git a/moses2/Jamfile b/moses2/Jamfile index b09bd089b..3b4d5d10a 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -65,15 +65,7 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp TranslationModel/CompactPT/ThrowingFwrite.cpp - TranslationModel/ProbingPT/ProbingPT.cpp - TranslationModel/ProbingPT/hash.cpp - TranslationModel/ProbingPT/line_splitter.cpp - TranslationModel/ProbingPT/probing_hash_utils.cpp - TranslationModel/ProbingPT/querying.cpp - TranslationModel/ProbingPT/storing.cpp - TranslationModel/ProbingPT/StoreVocab.cpp - TranslationModel/ProbingPT/StoreTarget.cpp - TranslationModel/ProbingPT/vocabid.cpp + TranslationModel/ProbingPT/ProbingPT.cpp parameters/AllOptions.cpp parameters/BookkeepingOptions.cpp diff --git a/misc/CreateProbingPT.cpp b/probingpt/CreateProbingPT.cpp similarity index 95% rename from misc/CreateProbingPT.cpp rename to probingpt/CreateProbingPT.cpp index 2cf6627ef..d68b43387 100644 --- a/misc/CreateProbingPT.cpp +++ b/probingpt/CreateProbingPT.cpp @@ -2,8 +2,8 @@ #include #include "util/usage.hh" #include "moses/TranslationModel/ProbingPT/storing.hh" -#include "moses/InputFileStream.h" -#include "moses/OutputFileStream.h" +#include "InputFileStream.h" +#include "OutputFileStream.h" #include "moses/Util.h" using namespace std; @@ -74,9 +74,9 @@ int main(int argc, char* argv[]) std::string ReformatSCFGFile(const std::string &path) { - Moses::InputFileStream inFile(path); + probingpt::InputFileStream inFile(path); string reformattedPath = path + ".reformat.gz"; - Moses::OutputFileStream outFile(reformattedPath); + probingpt::OutputFileStream outFile(reformattedPath); string line; while (getline(inFile, line)) { diff --git a/probingpt/InputFileStream.cpp b/probingpt/InputFileStream.cpp new file mode 100644 index 000000000..586d4bddb --- /dev/null +++ b/probingpt/InputFileStream.cpp @@ -0,0 +1,59 @@ +// $Id$ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include "InputFileStream.h" +#include "gzfilebuf.h" +#include + +using namespace std; + +namespace probingpt +{ + +InputFileStream::InputFileStream(const std::string &filePath) : + std::istream(NULL), m_streambuf(NULL) +{ + if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") { + m_streambuf = new gzfilebuf(filePath.c_str()); + } else { + std::filebuf* fb = new std::filebuf(); + fb = fb->open(filePath.c_str(), std::ios::in); + if (!fb) { + cerr << "Can't read " << filePath.c_str() << endl; + exit(1); + } + m_streambuf = fb; + } + this->init(m_streambuf); +} + +InputFileStream::~InputFileStream() +{ + delete m_streambuf; + m_streambuf = NULL; +} + +void InputFileStream::Close() +{ +} + +} + diff --git a/probingpt/InputFileStream.h b/probingpt/InputFileStream.h new file mode 100644 index 000000000..99933c093 --- /dev/null +++ b/probingpt/InputFileStream.h @@ -0,0 +1,46 @@ +// $Id$ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#include +#include +#include + +namespace probingpt +{ + +/** Used in place of std::istream, can read zipped files if it ends in .gz + */ +class InputFileStream: public std::istream +{ +protected: + std::streambuf *m_streambuf; +public: + + explicit InputFileStream(const std::string &filePath); + ~InputFileStream(); + + void Close(); +}; + +} + diff --git a/probingpt/Jamfile b/probingpt/Jamfile index 214da47ac..b08536d5f 100644 --- a/probingpt/Jamfile +++ b/probingpt/Jamfile @@ -1,7 +1,19 @@ +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ; + lib probingpt : - temp.cpp + StoreTarget.cpp + StoreVocab.cpp + hash.cpp + line_splitter.cpp + probing_hash_utils.cpp + querying.cpp + storing.cpp + vocabid.cpp + OutputFileStream.cpp + InputFileStream.cpp + deps ; -exe ppt : Main.cpp probingpt ; +exe CreateProbingPT : CreateProbingPT.cpp probingpt ; -alias programs : ppt ; +alias programs : CreateProbingPT ; diff --git a/probingpt/Main.cpp b/probingpt/Main.cpp deleted file mode 100644 index d04a8018b..000000000 --- a/probingpt/Main.cpp +++ /dev/null @@ -1,5 +0,0 @@ - -int main() -{ - -} diff --git a/probingpt/OutputFileStream.cpp b/probingpt/OutputFileStream.cpp new file mode 100644 index 000000000..56647dc9e --- /dev/null +++ b/probingpt/OutputFileStream.cpp @@ -0,0 +1,87 @@ +// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include +#include +#include +#include "OutputFileStream.h" +#include "gzfilebuf.h" + +using namespace std; +using namespace boost::algorithm; + +namespace probingpt +{ +OutputFileStream::OutputFileStream() : + boost::iostreams::filtering_ostream(), m_outFile(NULL), m_open(false) +{ +} + +OutputFileStream::OutputFileStream(const std::string &filePath) : + m_outFile(NULL), m_open(false) +{ + Open(filePath); +} + +OutputFileStream::~OutputFileStream() +{ + Close(); +} + +bool OutputFileStream::Open(const std::string &filePath) +{ + assert(!m_open); + if (filePath == std::string("-")) { + // Write to standard output. Leave m_outFile null. + this->push(std::cout); + } else { + m_outFile = new ofstream(filePath.c_str(), + ios_base::out | ios_base::binary); + if (m_outFile->fail()) { + return false; + } + + if (ends_with(filePath, ".gz")) { + this->push(boost::iostreams::gzip_compressor()); + } + this->push(*m_outFile); + } + + m_open = true; + return true; +} + +void OutputFileStream::Close() +{ + if (!m_open) return; + this->flush(); + if (m_outFile) { + this->pop(); // file + + m_outFile->close(); + delete m_outFile; + m_outFile = NULL; + } + m_open = false; +} + +} + diff --git a/probingpt/OutputFileStream.h b/probingpt/OutputFileStream.h new file mode 100644 index 000000000..0b2fc3251 --- /dev/null +++ b/probingpt/OutputFileStream.h @@ -0,0 +1,81 @@ +// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include + +namespace probingpt +{ + +/** Version of std::ostream with transparent compression. + * + * Transparently compresses output when writing to a file whose name ends in + * ".gz". Or, writes to stdout instead of a file when given a filename + * consisting of just a dash ("-"). + */ +class OutputFileStream: public boost::iostreams::filtering_ostream +{ +private: + /** File that needs flushing & closing when we close this stream. + * + * Is NULL when no file is opened, e.g. when writing to standard output. + */ + std::ofstream *m_outFile; + + /// Is this stream open? + bool m_open; + +public: + /** Create an unopened OutputFileStream. + * + * Until it's been opened, nothing can be done with this stream. + */ + OutputFileStream(); + + /// Create an OutputFileStream, and open it by calling Open(). + OutputFileStream(const std::string &filePath); + virtual ~OutputFileStream(); + + // TODO: Can we please just always throw an exception when this fails? + /** Open stream. + * + * If filePath is "-" (just a dash), this opens the stream for writing to + * standard output. Otherwise, it opens the given file. If the filename + * has the ".gz" suffix, output will be transparently compressed. + * + * Call Close() to close the file. + * + * Returns whether opening the file was successful. It may also throw an + * exception on failure. + */ + bool Open(const std::string &filePath); + + /// Flush and close stream. After this, the stream can be opened again. + void Close(); +}; + +} + diff --git a/moses2/TranslationModel/ProbingPT/StoreTarget.cpp b/probingpt/StoreTarget.cpp similarity index 93% rename from moses2/TranslationModel/ProbingPT/StoreTarget.cpp rename to probingpt/StoreTarget.cpp index af0de5c31..c01eb012a 100644 --- a/moses2/TranslationModel/ProbingPT/StoreTarget.cpp +++ b/probingpt/StoreTarget.cpp @@ -8,12 +8,12 @@ #include "StoreTarget.h" #include "line_splitter.hh" #include "probing_hash_utils.hh" -#include "../../legacy/OutputFileStream.h" -#include "../../legacy/Util2.h" +#include "moses2/legacy/OutputFileStream.h" +#include "moses2/legacy/Util2.h" using namespace std; -namespace Moses2 +namespace probingpt { StoreTarget::StoreTarget(const std::string &basepath) @@ -51,7 +51,7 @@ uint64_t StoreTarget::Save() } // clear coll - RemoveAllInColl(m_coll); + Moses2::RemoveAllInColl(m_coll); m_coll.clear(); // starting position of coll @@ -149,10 +149,10 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) util::SingleCharacter(' ')); while (it) { string tok = it->as_string(); - float prob = Scan(tok); + float prob = Moses2::Scan(tok); if (log_prob) { - prob = FloorScore(log(prob)); + prob = Moses2::FloorScore(log(prob)); if (prob == 0.0f) prob = 0.0000000001; } @@ -172,12 +172,12 @@ void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) it = util::TokenIter(line.word_align, util::SingleCharacter(' ')); while (it) { - string tokPair = Trim(it->as_string()); + string tokPair = Moses2::Trim(it->as_string()); if (tokPair.empty()) { break; } - vector alignPair = Tokenize(tokPair, "-"); + vector alignPair = Moses2::Tokenize(tokPair, "-"); assert(alignPair.size() == 2); bool nonTerm = false; @@ -241,11 +241,11 @@ void StoreTarget::AppendLexRO(std::string &prop, std::vector &retvector, //cerr << "lexProb=" << lexProb << endl; // append lex probs to pt probs - vector scores = Tokenize(lexProb); + vector scores = Moses2::Tokenize(lexProb); if (log_prob) { for (size_t i = 0; i < scores.size(); ++i) { - scores[i] = FloorScore(log(scores[i])); + scores[i] = Moses2::FloorScore(log(scores[i])); if (scores[i] == 0.0f) scores[i] = 0.0000000001; } } diff --git a/moses2/TranslationModel/ProbingPT/StoreTarget.h b/probingpt/StoreTarget.h similarity index 98% rename from moses2/TranslationModel/ProbingPT/StoreTarget.h rename to probingpt/StoreTarget.h index 77ccf08cd..7e5564ef1 100644 --- a/moses2/TranslationModel/ProbingPT/StoreTarget.h +++ b/probingpt/StoreTarget.h @@ -13,7 +13,7 @@ #include #include "StoreVocab.h" -namespace Moses2 +namespace probingpt { class line_text; diff --git a/moses2/TranslationModel/ProbingPT/StoreVocab.cpp b/probingpt/StoreVocab.cpp similarity index 88% rename from moses2/TranslationModel/ProbingPT/StoreVocab.cpp rename to probingpt/StoreVocab.cpp index e0b5b0b08..970249534 100644 --- a/moses2/TranslationModel/ProbingPT/StoreVocab.cpp +++ b/probingpt/StoreVocab.cpp @@ -7,7 +7,7 @@ #include #include "StoreVocab.h" -namespace Moses2 +namespace probingpt { } /* namespace Moses2 */ diff --git a/moses2/TranslationModel/ProbingPT/StoreVocab.h b/probingpt/StoreVocab.h similarity index 91% rename from moses2/TranslationModel/ProbingPT/StoreVocab.h rename to probingpt/StoreVocab.h index 3c405af66..cd0b16384 100644 --- a/moses2/TranslationModel/ProbingPT/StoreVocab.h +++ b/probingpt/StoreVocab.h @@ -7,10 +7,10 @@ #pragma once #include #include -#include "../../legacy/OutputFileStream.h" -#include "../../legacy/Util2.h" +#include "OutputFileStream.h" +#include "moses2/legacy/Util2.h" -namespace Moses2 +namespace probingpt { template diff --git a/probingpt/gzfilebuf.h b/probingpt/gzfilebuf.h new file mode 100644 index 000000000..a33b19d99 --- /dev/null +++ b/probingpt/gzfilebuf.h @@ -0,0 +1,94 @@ +#ifndef moses_gzfile_buf_h +#define moses_gzfile_buf_h + +#include +#include +#include +#include + +namespace probingpt +{ + +/** wrapper around gzip input stream. Unknown parentage + * @todo replace with boost version - output stream already uses it + */ +class gzfilebuf: public std::streambuf +{ +public: + gzfilebuf(const char *filename) { + _gzf = gzopen(filename, "rb"); + if (!_gzf) throw std::runtime_error( + "Could not open " + std::string(filename) + "."); + setg(_buff + sizeof(int), // beginning of putback area + _buff + sizeof(int), // read position + _buff + sizeof(int)); // end position + } + ~gzfilebuf() { + gzclose(_gzf); + } +protected: + virtual int_type overflow(int_type /* c */) { + throw; + } + + // write multiple characters + virtual std::streamsize xsputn(const char* /* s */, std::streamsize /* num */) { + throw; + } + + virtual std::streampos seekpos(std::streampos /* sp */, + std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */) { + throw; + } + + //read one character + virtual int_type underflow() { + // is read position before end of _buff? + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + /* process size of putback area + * - use number of characters read + * - but at most four + */ + unsigned int numPutback = gptr() - eback(); + if (numPutback > sizeof(int)) { + numPutback = sizeof(int); + } + + /* copy up to four characters previously read into + * the putback _buff (area of first four characters) + */ + std::memmove(_buff + (sizeof(int) - numPutback), gptr() - numPutback, + numPutback); + + // read new characters + int num = gzread(_gzf, _buff + sizeof(int), _buffsize - sizeof(int)); + if (num <= 0) { + // ERROR or EOF + return EOF; + } + + // reset _buff pointers + setg(_buff + (sizeof(int) - numPutback), // beginning of putback area + _buff + sizeof(int), // read position + _buff + sizeof(int) + num); // end of buffer + + // return next character + return traits_type::to_int_type(*gptr()); + } + + std::streamsize xsgetn(char* s, std::streamsize num) { + return gzread(_gzf, s, num); + } + +private: + gzFile _gzf; + static const unsigned int _buffsize = 1024; + char _buff[_buffsize]; +}; + +} + +#endif diff --git a/moses2/TranslationModel/ProbingPT/hash.cpp b/probingpt/hash.cpp similarity index 97% rename from moses2/TranslationModel/ProbingPT/hash.cpp rename to probingpt/hash.cpp index a0bda389a..e3959a87b 100644 --- a/moses2/TranslationModel/ProbingPT/hash.cpp +++ b/probingpt/hash.cpp @@ -3,7 +3,7 @@ using namespace std; -namespace Moses2 +namespace probingpt { uint64_t getHash(StringPiece text) diff --git a/moses2/TranslationModel/ProbingPT/hash.hh b/probingpt/hash.hh similarity index 94% rename from moses2/TranslationModel/ProbingPT/hash.hh rename to probingpt/hash.hh index 78cc27999..9d14e73d6 100644 --- a/moses2/TranslationModel/ProbingPT/hash.hh +++ b/probingpt/hash.hh @@ -6,7 +6,7 @@ #include "util/tokenize_piece.hh" #include -namespace Moses2 +namespace probingpt { //Gets the MurmurmurHash for give string diff --git a/moses2/TranslationModel/ProbingPT/line_splitter.cpp b/probingpt/line_splitter.cpp similarity index 99% rename from moses2/TranslationModel/ProbingPT/line_splitter.cpp rename to probingpt/line_splitter.cpp index e4b5e2694..8c99369f9 100644 --- a/moses2/TranslationModel/ProbingPT/line_splitter.cpp +++ b/probingpt/line_splitter.cpp @@ -1,6 +1,6 @@ #include "line_splitter.hh" -namespace Moses2 +namespace probingpt { line_text splitLine(const StringPiece &textin, bool scfg) diff --git a/moses2/TranslationModel/ProbingPT/line_splitter.hh b/probingpt/line_splitter.hh similarity index 98% rename from moses2/TranslationModel/ProbingPT/line_splitter.hh rename to probingpt/line_splitter.hh index 0b91fed09..e568270e2 100644 --- a/moses2/TranslationModel/ProbingPT/line_splitter.hh +++ b/probingpt/line_splitter.hh @@ -9,7 +9,7 @@ #include "util/tokenize_piece.hh" #include -namespace Moses2 +namespace probingpt { //Struct for holding processed line diff --git a/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp b/probingpt/probing_hash_utils.cpp similarity index 97% rename from moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp rename to probingpt/probing_hash_utils.cpp index 96c317b65..80f9dee81 100644 --- a/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ b/probingpt/probing_hash_utils.cpp @@ -2,7 +2,7 @@ #include "probing_hash_utils.hh" #include "util/file.hh" -namespace Moses2 +namespace probingpt { //Read table from disk, return memory map location diff --git a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh b/probingpt/probing_hash_utils.hh similarity index 97% rename from moses2/TranslationModel/ProbingPT/probing_hash_utils.hh rename to probingpt/probing_hash_utils.hh index fa5a9a5a0..a21236a08 100644 --- a/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh +++ b/probingpt/probing_hash_utils.hh @@ -11,7 +11,7 @@ #include #include -namespace Moses2 +namespace probingpt { #define API_VERSION 15 diff --git a/moses2/TranslationModel/ProbingPT/querying.cpp b/probingpt/querying.cpp similarity index 88% rename from moses2/TranslationModel/ProbingPT/querying.cpp rename to probingpt/querying.cpp index d09bd45a8..bd104b35c 100644 --- a/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/probingpt/querying.cpp @@ -1,10 +1,10 @@ #include "querying.hh" #include "util/exception.hh" -#include "../../legacy/Util2.h" +#include "moses2/legacy/Util2.h" using namespace std; -namespace Moses2 +namespace probingpt { QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) @@ -103,7 +103,7 @@ uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const { //TOO SLOW //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); - return Moses2::getKey(source_phrase, size); + return probingpt::getKey(source_phrase, size); } std::pair QueryEngine::query(uint64_t key) @@ -127,14 +127,14 @@ void QueryEngine::read_alignments(const std::string &alignPath) vector toks = Moses2::Tokenize(line, "\t "); UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); - uint32_t alignInd = Scan(toks[0]); + uint32_t alignInd = Moses2::Scan(toks[0]); if (alignInd >= alignColl.size()) { alignColl.resize(alignInd + 1); } Alignments &aligns = alignColl[alignInd]; for (size_t i = 1; i < toks.size(); ++i) { - size_t pos = Scan(toks[i]); + size_t pos = Moses2::Scan(toks[i]); aligns.push_back(pos); } } @@ -142,25 +142,25 @@ void QueryEngine::read_alignments(const std::string &alignPath) void QueryEngine::file_exits(const std::string &basePath) { - if (!FileExists(basePath + "/Alignments.dat")) { + if (!Moses2::FileExists(basePath + "/Alignments.dat")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat"); } - if (!FileExists(basePath + "/TargetColl.dat")) { + if (!Moses2::FileExists(basePath + "/TargetColl.dat")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat"); } - if (!FileExists(basePath + "/TargetVocab.dat")) { + if (!Moses2::FileExists(basePath + "/TargetVocab.dat")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat"); } - if (!FileExists(basePath + "/cache")) { + if (!Moses2::FileExists(basePath + "/cache")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/cache"); } - if (!FileExists(basePath + "/config")) { + if (!Moses2::FileExists(basePath + "/config")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/config"); } - if (!FileExists(basePath + "/probing_hash.dat")) { + if (!Moses2::FileExists(basePath + "/probing_hash.dat")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat"); } - if (!FileExists(basePath + "/source_vocabids")) { + if (!Moses2::FileExists(basePath + "/source_vocabids")) { UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids"); } diff --git a/moses2/TranslationModel/ProbingPT/querying.hh b/probingpt/querying.hh similarity index 95% rename from moses2/TranslationModel/ProbingPT/querying.hh rename to probingpt/querying.hh index 4cb3228f2..4bb9d0c96 100644 --- a/moses2/TranslationModel/ProbingPT/querying.hh +++ b/probingpt/querying.hh @@ -9,9 +9,9 @@ #include "probing_hash_utils.hh" #include "hash.hh" //Includes line splitter #include "line_splitter.hh" -#include "../../legacy/Util2.h" +#include "moses2/legacy/Util2.h" -namespace Moses2 +namespace probingpt { class QueryEngine @@ -68,7 +68,7 @@ public: } const std::string &foundStr = iter->second; - found = Scan(foundStr); + found = Moses2::Scan(foundStr); return true; } diff --git a/moses2/TranslationModel/ProbingPT/storing.cpp b/probingpt/storing.cpp similarity index 95% rename from moses2/TranslationModel/ProbingPT/storing.cpp rename to probingpt/storing.cpp index ff26bc72b..bf55042c9 100644 --- a/moses2/TranslationModel/ProbingPT/storing.cpp +++ b/probingpt/storing.cpp @@ -4,12 +4,12 @@ #include "storing.hh" #include "StoreTarget.h" #include "StoreVocab.h" -#include "../../legacy/Util2.h" -#include "../../legacy/InputFileStream.h" +#include "moses2/legacy/Util2.h" +#include "InputFileStream.h" using namespace std; -namespace Moses2 +namespace probingpt { /////////////////////////////////////////////////////////////////////// @@ -161,9 +161,9 @@ void createProbingPT(const std::string &phrasetable_path, // update cache - CURRENT source phrase, not prev if (max_cache_size) { std::string countStr = line.counts.as_string(); - countStr = Trim(countStr); + countStr = Moses2::Trim(countStr); if (!countStr.empty()) { - std::vector toks = Tokenize(countStr); + std::vector toks = Moses2::Tokenize(countStr); //cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl; if (toks.size() >= 2) { @@ -174,7 +174,7 @@ void createProbingPT(const std::string &phrasetable_path, uint64_t currKey = getKey(currVocabidSource); CacheItem *item = new CacheItem( - Trim(line.source_phrase.as_string()), + Moses2::Trim(line.source_phrase.as_string()), currKey, toks[1]); cache.push(item); @@ -244,7 +244,7 @@ size_t countUniqueSource(const std::string &path) std::string line, prevSource; while (std::getline(strme, line)) { - std::vector toks = TokenizeMultiCharSeparator(line, "|||"); + std::vector toks = Moses2::TokenizeMultiCharSeparator(line, "|||"); assert(toks.size() != 0); if (prevSource != toks[0]) { @@ -284,7 +284,7 @@ void serialize_cache( uint64_t getKey(const std::vector &vocabid_source) { - return Moses2::getKey(vocabid_source.data(), vocabid_source.size()); + return probingpt::getKey(vocabid_source.data(), vocabid_source.size()); } std::vector CreatePrefix(const std::vector &vocabid_source, size_t endPos) diff --git a/moses2/TranslationModel/ProbingPT/storing.hh b/probingpt/storing.hh similarity index 98% rename from moses2/TranslationModel/ProbingPT/storing.hh rename to probingpt/storing.hh index a6f8dd143..4efd1a450 100644 --- a/moses2/TranslationModel/ProbingPT/storing.hh +++ b/probingpt/storing.hh @@ -17,7 +17,7 @@ #include "util/file.hh" #include "vocabid.hh" -namespace Moses2 +namespace probingpt { typedef std::vector SourcePhrase; diff --git a/probingpt/temp.cpp b/probingpt/temp.cpp deleted file mode 100644 index e3d03f997..000000000 --- a/probingpt/temp.cpp +++ /dev/null @@ -1,5 +0,0 @@ - -int foo() -{ - return 5; -} diff --git a/moses2/TranslationModel/ProbingPT/vocabid.cpp b/probingpt/vocabid.cpp similarity index 87% rename from moses2/TranslationModel/ProbingPT/vocabid.cpp rename to probingpt/vocabid.cpp index e752f76bc..d1a14d7a8 100644 --- a/moses2/TranslationModel/ProbingPT/vocabid.cpp +++ b/probingpt/vocabid.cpp @@ -1,9 +1,9 @@ #include #include "vocabid.hh" #include "StoreVocab.h" -#include "../../legacy/Util2.h" +#include "moses2/legacy/Util2.h" -namespace Moses2 +namespace probingpt { void add_to_map(StoreVocab &sourceVocab, @@ -45,9 +45,9 @@ void read_map(std::map &karta, const char* filename) std::string line; while (getline(is, line)) { - std::vector toks = Tokenize(line, "\t"); + std::vector toks = Moses2::Tokenize(line, "\t"); assert(toks.size() == 2); - uint64_t ind = Scan(toks[1]); + uint64_t ind = Moses2::Scan(toks[1]); karta[ind] = toks[0]; } diff --git a/moses2/TranslationModel/ProbingPT/vocabid.hh b/probingpt/vocabid.hh similarity index 97% rename from moses2/TranslationModel/ProbingPT/vocabid.hh rename to probingpt/vocabid.hh index 9d37bd1b5..07837fde8 100644 --- a/moses2/TranslationModel/ProbingPT/vocabid.hh +++ b/probingpt/vocabid.hh @@ -13,7 +13,7 @@ #include "util/string_piece.hh" //Tokenization and work with StringPiece #include "util/tokenize_piece.hh" -namespace Moses2 +namespace probingpt { template class StoreVocab; From 804cccca3fec61afc614c562e0ac57bf5b7ea208 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 22:20:41 +0000 Subject: [PATCH 129/176] probing pt lib compiles.2 --- moses2/Jamfile | 13 +++--- .../{ProbingPT => }/ProbingPT.cpp | 44 +++++++++---------- .../{ProbingPT => }/ProbingPT.h | 18 +++++--- probingpt/CreateProbingPT.cpp | 4 +- probingpt/Jamfile | 16 ++++++- probingpt/StoreTarget.cpp | 4 +- 6 files changed, 57 insertions(+), 42 deletions(-) rename moses2/TranslationModel/{ProbingPT => }/ProbingPT.cpp (96%) rename moses2/TranslationModel/{ProbingPT => }/ProbingPT.h (96%) diff --git a/moses2/Jamfile b/moses2/Jamfile index 3b4d5d10a..46449d62f 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -1,4 +1,4 @@ -alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ../moses//moses ; +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ; lib moses2_lib : AlignmentInfo.cpp @@ -53,9 +53,10 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio LM/KENLMBatch.cpp LM/GPULM.cpp - TranslationModel/PhraseTable.cpp - TranslationModel/Transliteration.cpp - TranslationModel/UnknownWordPenalty.cpp + TranslationModel/PhraseTable.cpp + TranslationModel/ProbingPT.cpp + TranslationModel/Transliteration.cpp + TranslationModel/UnknownWordPenalty.cpp TranslationModel/Memory/PhraseTableMemory.cpp TranslationModel/CompactPT/BlockHashIndex.cpp @@ -65,8 +66,6 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp TranslationModel/CompactPT/ThrowingFwrite.cpp - TranslationModel/ProbingPT/ProbingPT.cpp - parameters/AllOptions.cpp parameters/BookkeepingOptions.cpp parameters/ContextParameters.cpp @@ -157,7 +156,7 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio deps ; -exe moses2 : Main.cpp moses2_lib ; +exe moses2 : Main.cpp moses2_lib ../probingpt//probingpt ../util//kenutil ../util//rt ../lm//kenlm ; if [ xmlrpc ] { echo "Building Moses2" ; diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/moses2/TranslationModel/ProbingPT.cpp similarity index 96% rename from moses2/TranslationModel/ProbingPT/ProbingPT.cpp rename to moses2/TranslationModel/ProbingPT.cpp index 852f0a7b7..5f5090def 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses2/TranslationModel/ProbingPT.cpp @@ -6,24 +6,24 @@ */ #include #include "ProbingPT.h" -#include "querying.hh" -#include "probing_hash_utils.hh" +#include "probingpt/querying.hh" +#include "probingpt/probing_hash_utils.hh" #include "util/exception.hh" -#include "../../System.h" -#include "../../Scores.h" -#include "../../Phrase.h" -#include "../../legacy/InputFileStream.h" -#include "../../legacy/FactorCollection.h" -#include "../../legacy/Util2.h" -#include "../../FF/FeatureFunctions.h" -#include "../../PhraseBased/PhraseImpl.h" -#include "../../PhraseBased/TargetPhraseImpl.h" -#include "../../PhraseBased/Manager.h" -#include "../../PhraseBased/TargetPhrases.h" -#include "../../SCFG/InputPath.h" -#include "../../SCFG/Manager.h" -#include "../../SCFG/TargetPhraseImpl.h" -#include "../../SCFG/PhraseImpl.h" +#include "../System.h" +#include "../Scores.h" +#include "../Phrase.h" +#include "../legacy/InputFileStream.h" +#include "../legacy/FactorCollection.h" +#include "../legacy/Util2.h" +#include "../FF/FeatureFunctions.h" +#include "../PhraseBased/PhraseImpl.h" +#include "../PhraseBased/TargetPhraseImpl.h" +#include "../PhraseBased/Manager.h" +#include "../PhraseBased/TargetPhrases.h" +#include "../SCFG/InputPath.h" +#include "../SCFG/Manager.h" +#include "../SCFG/TargetPhraseImpl.h" +#include "../SCFG/PhraseImpl.h" using namespace std; @@ -81,7 +81,7 @@ ProbingPT::~ProbingPT() void ProbingPT::Load(System &system) { - m_engine = new QueryEngine(m_path.c_str(), load_method); + m_engine = new probingpt::QueryEngine(m_path.c_str(), load_method); m_unkId = 456456546456; @@ -293,14 +293,14 @@ TargetPhraseImpl *ProbingPT::CreateTargetPhrase( const System &system, const char *&offset) const { - TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset; + probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset; size_t numRealWords = tpInfo->numWords / m_output.size(); TargetPhraseImpl *tp = new (pool.Allocate()) TargetPhraseImpl(pool, *this, system, numRealWords); - offset += sizeof(TargetPhraseInfo); + offset += sizeof(probingpt::TargetPhraseInfo); // scores SCORE *scores = (SCORE*) offset; @@ -600,12 +600,12 @@ SCFG::TargetPhraseImpl *ProbingPT::CreateTargetPhraseSCFG( const System &system, const char *&offset) const { - TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset; + probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset; SCFG::TargetPhraseImpl *tp = new (pool.Allocate()) SCFG::TargetPhraseImpl(pool, *this, system, tpInfo->numWords - 1); - offset += sizeof(TargetPhraseInfo); + offset += sizeof(probingpt::TargetPhraseInfo); // scores SCORE *scores = (SCORE*) offset; diff --git a/moses2/TranslationModel/ProbingPT/ProbingPT.h b/moses2/TranslationModel/ProbingPT.h similarity index 96% rename from moses2/TranslationModel/ProbingPT/ProbingPT.h rename to moses2/TranslationModel/ProbingPT.h index e0dcf40f1..47d22e1b3 100644 --- a/moses2/TranslationModel/ProbingPT/ProbingPT.h +++ b/moses2/TranslationModel/ProbingPT.h @@ -11,17 +11,21 @@ #include #include #include -#include "../PhraseTable.h" -#include "../../Vector.h" -#include "../../Phrase.h" -#include "../../SCFG/ActiveChart.h" +#include "PhraseTable.h" +#include "../Vector.h" +#include "../Phrase.h" +#include "../SCFG/ActiveChart.h" #include "util/mmap.hh" +namespace probingpt +{ +class QueryEngine; +class target_text; +} + namespace Moses2 { class AlignmentInfo; -class QueryEngine; -class target_text; class MemPool; class System; class RecycleData; @@ -98,7 +102,7 @@ protected: util::LoadMethod load_method; uint64_t m_unkId; - QueryEngine *m_engine; + probingpt::QueryEngine *m_engine; void CreateAlignmentMap(System &system, const std::string path); diff --git a/probingpt/CreateProbingPT.cpp b/probingpt/CreateProbingPT.cpp index d68b43387..e1b7a40d6 100644 --- a/probingpt/CreateProbingPT.cpp +++ b/probingpt/CreateProbingPT.cpp @@ -1,7 +1,7 @@ #include #include #include "util/usage.hh" -#include "moses/TranslationModel/ProbingPT/storing.hh" +#include "storing.hh" #include "InputFileStream.h" #include "OutputFileStream.h" #include "moses/Util.h" @@ -66,7 +66,7 @@ int main(int argc, char* argv[]) inPath = ReformatSCFGFile(inPath); } - Moses::createProbingPT(inPath, outPath, num_scores, num_lex_scores, log_prob, max_cache_size, scfg); + probingpt::createProbingPT(inPath, outPath, num_scores, num_lex_scores, log_prob, max_cache_size, scfg); //util::PrintUsage(std::cout); return 0; diff --git a/probingpt/Jamfile b/probingpt/Jamfile index b08536d5f..b8560c8c6 100644 --- a/probingpt/Jamfile +++ b/probingpt/Jamfile @@ -1,4 +1,4 @@ -alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ; +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ; lib probingpt : StoreTarget.cpp @@ -11,9 +11,21 @@ lib probingpt : vocabid.cpp OutputFileStream.cpp InputFileStream.cpp + +# ../util/string_piece.cc +# ../util/exception.cc +# ../util/file.cc +# ../util/file_piece.cc +# ../util/murmur_hash.cc +# ../util/mmap.cc +# ../util/read_compressed.cc +# ../util/parallel_read.cc +# ../util/ersatz_progress.cc + + deps ; -exe CreateProbingPT : CreateProbingPT.cpp probingpt ; +exe CreateProbingPT : CreateProbingPT.cpp probingpt ../util//kenutil ../util//rt ; alias programs : CreateProbingPT ; diff --git a/probingpt/StoreTarget.cpp b/probingpt/StoreTarget.cpp index c01eb012a..6a041512e 100644 --- a/probingpt/StoreTarget.cpp +++ b/probingpt/StoreTarget.cpp @@ -8,7 +8,7 @@ #include "StoreTarget.h" #include "line_splitter.hh" #include "probing_hash_utils.hh" -#include "moses2/legacy/OutputFileStream.h" +#include "OutputFileStream.h" #include "moses2/legacy/Util2.h" using namespace std; @@ -89,7 +89,7 @@ void StoreTarget::Save(const target_text &rule) void StoreTarget::SaveAlignment() { std::string path = m_basePath + "/Alignments.dat"; - Moses2::OutputFileStream file(path); + probingpt::OutputFileStream file(path); BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) { file << valPair.second << "\t"; From 73d5a1f43a054b823092efe10d9b89a8aab8c7a7 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 23:20:31 +0000 Subject: [PATCH 130/176] probing pt lib compiles.3 --- moses2/FF/FeatureRegistry.cpp | 2 +- moses2/Jamfile | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp index bb8ee5058..f7eae4205 100644 --- a/moses2/FF/FeatureRegistry.cpp +++ b/moses2/FF/FeatureRegistry.cpp @@ -1,7 +1,7 @@ #include "FeatureRegistry.h" #include "../TranslationModel/Memory/PhraseTableMemory.h" -#include "../TranslationModel/ProbingPT/ProbingPT.h" +#include "../TranslationModel/ProbingPT.h" #include "../TranslationModel/UnknownWordPenalty.h" #include "../TranslationModel/Transliteration.h" diff --git a/moses2/Jamfile b/moses2/Jamfile index 46449d62f..0e978998f 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -1,4 +1,8 @@ -alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/TranslationModel/CompactPT//cmph ; +local with-cmph = [ option.get "with-cmph" ] ; +lib cmph : : $(with-cmph)/lib $(with-cmph)/lib64 ; + +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ; + lib moses2_lib : AlignmentInfo.cpp @@ -154,7 +158,12 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses/Translatio server/Translator.cpp server/TranslationRequest.cpp - deps ; + deps + cmph + : + $(with-cmph)/include + + ; exe moses2 : Main.cpp moses2_lib ../probingpt//probingpt ../util//kenutil ../util//rt ../lm//kenlm ; From 3b4fc04fa0a495e4e978b5bec48299203cea4c79 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 Feb 2017 23:39:13 +0000 Subject: [PATCH 131/176] eclipse --- contrib/other-builds/moses2-cmd/.cproject | 2 + contrib/other-builds/moses2-cmd/.project | 1 + contrib/other-builds/moses2/.project | 101 ++-------------- contrib/other-builds/probingpt/.cproject | 119 ++++++++++++++++++ contrib/other-builds/probingpt/.project | 139 ++++++++++++++++++++++ 5 files changed, 269 insertions(+), 93 deletions(-) create mode 100644 contrib/other-builds/probingpt/.cproject create mode 100644 contrib/other-builds/probingpt/.project diff --git a/contrib/other-builds/moses2-cmd/.cproject b/contrib/other-builds/moses2-cmd/.cproject index bb8b44994..b71b35234 100644 --- a/contrib/other-builds/moses2-cmd/.cproject +++ b/contrib/other-builds/moses2-cmd/.cproject @@ -56,6 +56,7 @@ + @@ -77,6 +78,7 @@ + diff --git a/contrib/other-builds/moses2-cmd/.project b/contrib/other-builds/moses2-cmd/.project index 84591d671..7b1b96ecc 100644 --- a/contrib/other-builds/moses2-cmd/.project +++ b/contrib/other-builds/moses2-cmd/.project @@ -6,6 +6,7 @@ lm moses moses2 + probingpt util diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project index 431ba885e..8677885e1 100644 --- a/contrib/other-builds/moses2/.project +++ b/contrib/other-builds/moses2/.project @@ -808,9 +808,14 @@ PARENT-3-PROJECT_LOC/moses2/TranslationModel/PhraseTable.h - TranslationModel/ProbingPT - 2 - virtual:/virtual + TranslationModel/ProbingPT.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT.cpp + + + TranslationModel/ProbingPT.h + 1 + PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT.h TranslationModel/Transliteration.cpp @@ -1492,96 +1497,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/TranslationModel/Memory/PhraseTableMemory.h - - TranslationModel/ProbingPT/ProbingPT.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/ProbingPT.cpp - - - TranslationModel/ProbingPT/ProbingPT.h - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/ProbingPT.h - - - TranslationModel/ProbingPT/StoreTarget.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreTarget.cpp - - - TranslationModel/ProbingPT/StoreTarget.h - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreTarget.h - - - TranslationModel/ProbingPT/StoreVocab.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreVocab.cpp - - - TranslationModel/ProbingPT/StoreVocab.h - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/StoreVocab.h - - - TranslationModel/ProbingPT/hash.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/hash.cpp - - - TranslationModel/ProbingPT/hash.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/hash.hh - - - TranslationModel/ProbingPT/line_splitter.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/line_splitter.cpp - - - TranslationModel/ProbingPT/line_splitter.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/line_splitter.hh - - - TranslationModel/ProbingPT/probing_hash_utils.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/probing_hash_utils.cpp - - - TranslationModel/ProbingPT/probing_hash_utils.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/probing_hash_utils.hh - - - TranslationModel/ProbingPT/querying.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/querying.cpp - - - TranslationModel/ProbingPT/querying.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/querying.hh - - - TranslationModel/ProbingPT/storing.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/storing.cpp - - - TranslationModel/ProbingPT/storing.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/storing.hh - - - TranslationModel/ProbingPT/vocabid.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/vocabid.cpp - - - TranslationModel/ProbingPT/vocabid.hh - 1 - PARENT-3-PROJECT_LOC/moses2/TranslationModel/ProbingPT/vocabid.hh - defer/CubePruningBitmapStack/Misc.cpp 1 diff --git a/contrib/other-builds/probingpt/.cproject b/contrib/other-builds/probingpt/.cproject new file mode 100644 index 000000000..05cae6aa9 --- /dev/null +++ b/contrib/other-builds/probingpt/.cproject @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/probingpt/.project b/contrib/other-builds/probingpt/.project new file mode 100644 index 000000000..5089b557a --- /dev/null +++ b/contrib/other-builds/probingpt/.project @@ -0,0 +1,139 @@ + + + probingpt + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/InputFileStream.cpp + + + InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/probingpt/InputFileStream.h + + + Jamfile + 1 + PARENT-3-PROJECT_LOC/probingpt/Jamfile + + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/probingpt/OutputFileStream.h + + + StoreTarget.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/StoreTarget.cpp + + + StoreTarget.h + 1 + PARENT-3-PROJECT_LOC/probingpt/StoreTarget.h + + + StoreVocab.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/StoreVocab.cpp + + + StoreVocab.h + 1 + PARENT-3-PROJECT_LOC/probingpt/StoreVocab.h + + + gzfilebuf.h + 1 + PARENT-3-PROJECT_LOC/probingpt/gzfilebuf.h + + + hash.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/hash.cpp + + + hash.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/hash.hh + + + line_splitter.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/line_splitter.cpp + + + line_splitter.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/line_splitter.hh + + + probing_hash_utils.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/probing_hash_utils.cpp + + + probing_hash_utils.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/probing_hash_utils.hh + + + querying.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/querying.cpp + + + querying.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/querying.hh + + + storing.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/storing.cpp + + + storing.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/storing.hh + + + vocabid.cpp + 1 + PARENT-3-PROJECT_LOC/probingpt/vocabid.cpp + + + vocabid.hh + 1 + PARENT-3-PROJECT_LOC/probingpt/vocabid.hh + + + From 07cef43cea1d3b7a542eab817718a93b74c68ebc Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 05:46:51 -0500 Subject: [PATCH 132/176] allow optional cmph --- moses2/Jamfile | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/moses2/Jamfile b/moses2/Jamfile index 0e978998f..f973456ec 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -1,7 +1,18 @@ local with-cmph = [ option.get "with-cmph" ] ; -lib cmph : : $(with-cmph)/lib $(with-cmph)/lib64 ; +local includes = ; -alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ; +if $(with-cmph) { + lib cmph : : $(with-cmph)/lib $(with-cmph)/lib64 ; + includes += $(with-cmph)/include ; +} +else { + alias cmph ; +} + +max-factors = [ option.get "max-factors" : 4 : 4 ] ; +max-factors = MAX_NUM_FACTORS=$(max-factors) $(FACTOR-LOG) ; + +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem : : : $(max-factors) ; lib moses2_lib : @@ -161,8 +172,7 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ; deps cmph : - $(with-cmph)/include - + $(includes) ; exe moses2 : Main.cpp moses2_lib ../probingpt//probingpt ../util//kenutil ../util//rt ../lm//kenlm ; From a391b84b4275d90cabd2bf9d1734ac2c10c9e8bd Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 11:30:39 +0000 Subject: [PATCH 133/176] moses and moses2 both use probingpt lib --- contrib/other-builds/moses/.project | 241 +------------- moses/Jamfile | 4 +- .../{ProbingPT => }/ProbingPT.cpp | 10 +- .../{ProbingPT => }/ProbingPT.h | 13 +- moses/TranslationModel/ProbingPT/Jamfile | 8 - .../ProbingPT/StoreTarget.cpp | 264 ---------------- .../TranslationModel/ProbingPT/StoreTarget.h | 51 --- .../TranslationModel/ProbingPT/StoreVocab.cpp | 13 - moses/TranslationModel/ProbingPT/StoreVocab.h | 60 ---- moses/TranslationModel/ProbingPT/hash.cpp | 44 --- moses/TranslationModel/ProbingPT/hash.hh | 17 - .../ProbingPT/line_splitter.cpp | 103 ------ .../ProbingPT/line_splitter.hh | 57 ---- .../ProbingPT/probing_hash_utils.cpp | 50 --- .../ProbingPT/probing_hash_utils.hh | 51 --- moses/TranslationModel/ProbingPT/querying.cpp | 141 --------- moses/TranslationModel/ProbingPT/querying.hh | 66 ---- moses/TranslationModel/ProbingPT/storing.cpp | 298 ------------------ moses/TranslationModel/ProbingPT/storing.hh | 92 ------ moses/TranslationModel/ProbingPT/vocabid.cpp | 59 ---- moses/TranslationModel/ProbingPT/vocabid.hh | 29 -- probingpt/Jamfile | 1 + probingpt/querying.hh | 3 +- 23 files changed, 28 insertions(+), 1647 deletions(-) rename moses/TranslationModel/{ProbingPT => }/ProbingPT.cpp (96%) rename moses/TranslationModel/{ProbingPT => }/ProbingPT.h (93%) delete mode 100644 moses/TranslationModel/ProbingPT/Jamfile delete mode 100644 moses/TranslationModel/ProbingPT/StoreTarget.cpp delete mode 100644 moses/TranslationModel/ProbingPT/StoreTarget.h delete mode 100644 moses/TranslationModel/ProbingPT/StoreVocab.cpp delete mode 100644 moses/TranslationModel/ProbingPT/StoreVocab.h delete mode 100644 moses/TranslationModel/ProbingPT/hash.cpp delete mode 100644 moses/TranslationModel/ProbingPT/hash.hh delete mode 100644 moses/TranslationModel/ProbingPT/line_splitter.cpp delete mode 100644 moses/TranslationModel/ProbingPT/line_splitter.hh delete mode 100644 moses/TranslationModel/ProbingPT/probing_hash_utils.cpp delete mode 100644 moses/TranslationModel/ProbingPT/probing_hash_utils.hh delete mode 100644 moses/TranslationModel/ProbingPT/querying.cpp delete mode 100644 moses/TranslationModel/ProbingPT/querying.hh delete mode 100644 moses/TranslationModel/ProbingPT/storing.cpp delete mode 100644 moses/TranslationModel/ProbingPT/storing.hh delete mode 100644 moses/TranslationModel/ProbingPT/vocabid.cpp delete mode 100644 moses/TranslationModel/ProbingPT/vocabid.hh diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index de065158d..d7e99c253 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -2271,9 +2271,14 @@ PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h - TranslationModel/ProbingPT - 2 - virtual:/virtual + TranslationModel/ProbingPT.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT.cpp + + + TranslationModel/ProbingPT.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT.h TranslationModel/RuleTable @@ -3355,106 +3360,6 @@ 2 virtual:/virtual - - TranslationModel/ProbingPT/Jamfile - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/Jamfile - - - TranslationModel/ProbingPT/ProbingPT.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/ProbingPT.cpp - - - TranslationModel/ProbingPT/ProbingPT.h - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/ProbingPT.h - - - TranslationModel/ProbingPT/StoreTarget.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreTarget.cpp - - - TranslationModel/ProbingPT/StoreTarget.h - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreTarget.h - - - TranslationModel/ProbingPT/StoreVocab.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreVocab.cpp - - - TranslationModel/ProbingPT/StoreVocab.h - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/StoreVocab.h - - - TranslationModel/ProbingPT/bin - 2 - virtual:/virtual - - - TranslationModel/ProbingPT/hash.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/hash.cpp - - - TranslationModel/ProbingPT/hash.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/hash.hh - - - TranslationModel/ProbingPT/line_splitter.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/line_splitter.cpp - - - TranslationModel/ProbingPT/line_splitter.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/line_splitter.hh - - - TranslationModel/ProbingPT/probing_hash_utils.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp - - - TranslationModel/ProbingPT/probing_hash_utils.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/probing_hash_utils.hh - - - TranslationModel/ProbingPT/querying.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.cpp - - - TranslationModel/ProbingPT/querying.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.hh - - - TranslationModel/ProbingPT/storing.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/storing.cpp - - - TranslationModel/ProbingPT/storing.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/storing.hh - - - TranslationModel/ProbingPT/vocabid.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/vocabid.cpp - - - TranslationModel/ProbingPT/vocabid.hh - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/vocabid.hh - TranslationModel/RuleTable/Loader.h 1 @@ -3955,11 +3860,6 @@ 2 virtual:/virtual - - TranslationModel/ProbingPT/bin/darwin-4.2.1 - 2 - virtual:/virtual - TranslationModel/UG/bin/darwin-4.2.1 2 @@ -4515,11 +4415,6 @@ 2 virtual:/virtual - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release - 2 - virtual:/virtual - TranslationModel/UG/bin/darwin-4.2.1/release 2 @@ -4645,16 +4540,6 @@ 2 virtual:/virtual - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on - 2 - virtual:/virtual - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static - 2 - virtual:/virtual - TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on 2 @@ -4700,16 +4585,6 @@ 2 virtual:/virtual - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static - 2 - virtual:/virtual - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi - 2 - virtual:/virtual - TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static 2 @@ -5350,66 +5225,6 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/link-static/threading-multi/ThrowingFwrite.o - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi - 2 - virtual:/virtual - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/ProbingPT.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/ProbingPT.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreTarget.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreTarget.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreVocab.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/StoreVocab.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/hash.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/hash.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/huffmanish.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/huffmanish.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/line_splitter.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/line_splitter.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/probing_hash_utils.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/probing_hash_utils.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/quering.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/quering.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/querying.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/querying.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/storing.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/storing.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/vocabid.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/link-static/threading-multi/vocabid.o - TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi 2 @@ -5975,46 +5790,6 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ProbingPT.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ProbingPT.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/huffmanish.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/huffmanish.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/line_splitter.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/line_splitter.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/probing_hash_utils.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/probing_hash_utils.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quering.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/quering.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/storing.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/storing.o - - - TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocabid.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/vocabid.o - TranslationModel/UG/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/count-ptable-features 1 diff --git a/moses/Jamfile b/moses/Jamfile index 49aab9025..5200029fb 100644 --- a/moses/Jamfile +++ b/moses/Jamfile @@ -122,10 +122,10 @@ vwfiles synlm mmlib mserver headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT -TranslationModel/ProbingPT//ProbingPT ThreadPool ..//search ../util/double-conversion//double-conversion +../probingpt//probingpt ..//z ../OnDiskPt//OnDiskPt $(TOP)//boost_filesystem @@ -139,5 +139,5 @@ alias headers-to-install : [ glob-tree *.h ] ; import testing ; -unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ; +unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ../probingpt//probingpt ..//boost_unit_test_framework ; diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT.cpp similarity index 96% rename from moses/TranslationModel/ProbingPT/ProbingPT.cpp rename to moses/TranslationModel/ProbingPT.cpp index 1ae0c67c3..2a7369622 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT.cpp @@ -5,7 +5,8 @@ #include "moses/TargetPhraseCollection.h" #include "moses/InputFileStream.h" #include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" -#include "querying.hh" +#include "probingpt/querying.hh" +#include "probingpt/probing_hash_utils.hh" using namespace std; @@ -14,6 +15,7 @@ namespace Moses ProbingPT::ProbingPT(const std::string &line) : PhraseDictionary(line,true) ,m_engine(NULL) + ,load_method(util::POPULATE_OR_READ) { ReadParameters(); @@ -31,7 +33,7 @@ void ProbingPT::Load(AllOptions::ptr const& opts) m_options = opts; SetFeaturesToApply(); - m_engine = new QueryEngine(m_filePath.c_str()); + m_engine = new probingpt::QueryEngine(m_filePath.c_str(), load_method); m_unkId = 456456546456; @@ -256,12 +258,12 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrases( TargetPhrase *ProbingPT::CreateTargetPhrase( const char *&offset) const { - TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset; + probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset; size_t numRealWords = tpInfo->numWords / m_output.size(); TargetPhrase *tp = new TargetPhrase(this); - offset += sizeof(TargetPhraseInfo); + offset += sizeof(probingpt::TargetPhraseInfo); // scores float *scores = (float*) offset; diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.h b/moses/TranslationModel/ProbingPT.h similarity index 93% rename from moses/TranslationModel/ProbingPT/ProbingPT.h rename to moses/TranslationModel/ProbingPT.h index 953a2dc2f..bdf5a3bda 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.h +++ b/moses/TranslationModel/ProbingPT.h @@ -3,16 +3,20 @@ #include #include #include -#include "../PhraseDictionary.h" +#include "PhraseDictionary.h" +#include "util/mmap.hh" +namespace probingpt +{ +class QueryEngine; +class target_text; +} namespace Moses { class ChartParser; class ChartCellCollectionBase; class ChartRuleLookupManager; -class QueryEngine; -class target_text; class ProbingPT : public PhraseDictionary { @@ -39,12 +43,13 @@ public: protected: - QueryEngine *m_engine; + probingpt::QueryEngine *m_engine; uint64_t m_unkId; std::vector m_sourceVocab; // factor id -> pt id std::vector m_targetVocab; // pt id -> factor* std::vector m_aligns; + util::LoadMethod load_method; boost::iostreams::mapped_file_source file; const char *data; diff --git a/moses/TranslationModel/ProbingPT/Jamfile b/moses/TranslationModel/ProbingPT/Jamfile deleted file mode 100644 index 29c6ec41d..000000000 --- a/moses/TranslationModel/ProbingPT/Jamfile +++ /dev/null @@ -1,8 +0,0 @@ -local current = "" ; -local includes = ; - -fakelib ProbingPT : [ glob *.cpp ] ../..//headers : $(includes) $(PT-LOG) : : $(includes) ; - -path-constant PT-LOG : bin/pt.log ; -update-if-changed $(PT-LOG) $(current) ; - diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.cpp b/moses/TranslationModel/ProbingPT/StoreTarget.cpp deleted file mode 100644 index f586a26b9..000000000 --- a/moses/TranslationModel/ProbingPT/StoreTarget.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - * StoreTarget.cpp - * - * Created on: 19 Jan 2016 - * Author: hieu - */ -#include -#include "StoreTarget.h" -#include "line_splitter.hh" -#include "probing_hash_utils.hh" -#include "moses/OutputFileStream.h" -#include "moses/Util.h" - -using namespace std; - -namespace Moses -{ - -StoreTarget::StoreTarget(const std::string &basepath) - :m_basePath(basepath) - ,m_vocab(basepath + "/TargetVocab.dat") -{ - std::string path = basepath + "/TargetColl.dat"; - m_fileTargetColl.open(path.c_str(), - std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc); - if (!m_fileTargetColl.is_open()) { - throw "can't create file "; - } - -} - -StoreTarget::~StoreTarget() -{ - assert(m_coll.empty()); - m_fileTargetColl.close(); - - // vocab - m_vocab.Save(); -} - -uint64_t StoreTarget::Save() -{ - uint64_t ret = m_fileTargetColl.tellp(); - - // save to disk - uint64_t numTP = m_coll.size(); - m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t)); - - for (size_t i = 0; i < m_coll.size(); ++i) { - Save(*m_coll[i]); - } - - // clear coll - RemoveAllInColl(m_coll); - m_coll.clear(); - - // starting position of coll - return ret; -} - -void StoreTarget::Save(const target_text &rule) -{ - // metadata for each tp - TargetPhraseInfo tpInfo; - tpInfo.alignTerm = GetAlignId(rule.word_align_term); - tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term); - tpInfo.numWords = rule.target_phrase.size(); - tpInfo.propLength = rule.property.size(); - - //cerr << "TPInfo=" << sizeof(TPInfo); - m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo)); - - // scores - for (size_t i = 0; i < rule.prob.size(); ++i) { - float prob = rule.prob[i]; - m_fileTargetColl.write((char*) &prob, sizeof(prob)); - } - - // tp - for (size_t i = 0; i < rule.target_phrase.size(); ++i) { - uint32_t vocabId = rule.target_phrase[i]; - m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId)); - } - - // prop TODO - -} - -void StoreTarget::SaveAlignment() -{ - std::string path = m_basePath + "/Alignments.dat"; - OutputFileStream file(path); - - BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) { - file << valPair.second << "\t"; - - const std::vector &aligns = valPair.first; - BOOST_FOREACH(size_t align, aligns) { - file << align << " "; - } - file << endl; - } - -} - -void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg) -{ - target_text *rule = new target_text; - //cerr << "line.target_phrase=" << line.target_phrase << endl; - - // target_phrase - vector nonTerms; - util::TokenIter it; - it = util::TokenIter(line.target_phrase, - util::SingleCharacter(' ')); - while (it) { - StringPiece word = *it; - //cerr << "word=" << word << endl; - - bool nonTerm = false; - if (scfg) { - // not really sure how to handle factored SCFG and NT - if (scfg && word[0] == '[' && word[word.size() - 1] == ']') { - //cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl; - nonTerm = true; - } - nonTerms.push_back(nonTerm); - } - - util::TokenIter itFactor; - itFactor = util::TokenIter(word, - util::SingleCharacter('|')); - while (itFactor) { - StringPiece factor = *itFactor; - - string factorStr = factor.as_string(); - uint32_t vocabId = m_vocab.GetVocabId(factorStr); - - rule->target_phrase.push_back(vocabId); - - itFactor++; - } - - it++; - } - - // probs - it = util::TokenIter(line.prob, - util::SingleCharacter(' ')); - while (it) { - string tok = it->as_string(); - float prob = Scan(tok); - - if (log_prob) { - prob = FloorScore(log(prob)); - if (prob == 0.0f) prob = 0.0000000001; - } - - rule->prob.push_back(prob); - it++; - } - - /* - cerr << "nonTerms="; - for (size_t i = 0; i < nonTerms.size(); ++i) { - cerr << nonTerms[i] << " "; - } - cerr << endl; - */ - - // alignment - it = util::TokenIter(line.word_align, - util::SingleCharacter(' ')); - while (it) { - string tokPair = Trim(it->as_string()); - if (tokPair.empty()) { - break; - } - - vector alignPair = Tokenize(tokPair, "-"); - assert(alignPair.size() == 2); - - bool nonTerm = false; - size_t sourcePos = alignPair[0]; - size_t targetPos = alignPair[1]; - if (scfg) { - nonTerm = nonTerms[targetPos]; - } - - //cerr << targetPos << "=" << nonTerm << endl; - - if (nonTerm) { - rule->word_align_non_term.push_back(sourcePos); - rule->word_align_non_term.push_back(targetPos); - //cerr << (int) rule->word_all1.back() << " "; - } else { - rule->word_align_term.push_back(sourcePos); - rule->word_align_term.push_back(targetPos); - } - - it++; - } - - // extra scores - string prop = line.property.as_string(); - AppendLexRO(prop, rule->prob, log_prob); - - //cerr << "line.property=" << line.property << endl; - //cerr << "prop=" << prop << endl; - - // properties - /* - for (size_t i = 0; i < prop.size(); ++i) { - rule->property.push_back(prop[i]); - } - */ - m_coll.push_back(rule); -} - -uint32_t StoreTarget::GetAlignId(const std::vector &align) -{ - boost::unordered_map, uint32_t>::iterator iter = - m_aligns.find(align); - if (iter == m_aligns.end()) { - uint32_t ind = m_aligns.size(); - m_aligns[align] = ind; - return ind; - } else { - return iter->second; - } -} - -void StoreTarget::AppendLexRO(std::string &prop, std::vector &retvector, - bool log_prob) const -{ - size_t startPos = prop.find("{{LexRO "); - - if (startPos != string::npos) { - size_t endPos = prop.find("}}", startPos + 8); - string lexProb = prop.substr(startPos + 8, endPos - startPos - 8); - //cerr << "lexProb=" << lexProb << endl; - - // append lex probs to pt probs - vector scores = Tokenize(lexProb); - - if (log_prob) { - for (size_t i = 0; i < scores.size(); ++i) { - scores[i] = FloorScore(log(scores[i])); - if (scores[i] == 0.0f) scores[i] = 0.0000000001; - } - } - - for (size_t i = 0; i < scores.size(); ++i) { - retvector.push_back(scores[i]); - } - - // exclude LexRO property from property column - prop = prop.substr(0, startPos) - + prop.substr(endPos + 2, prop.size() - endPos - 2); - //cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl; - } -} - -} /* namespace Moses2 */ diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.h b/moses/TranslationModel/ProbingPT/StoreTarget.h deleted file mode 100644 index 331c197b3..000000000 --- a/moses/TranslationModel/ProbingPT/StoreTarget.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * StoreTarget.h - * - * Created on: 19 Jan 2016 - * Author: hieu - */ -#pragma once -#include -#include -#include -#include -#include -#include -#include "StoreVocab.h" - -namespace Moses -{ - -class line_text; -class target_text; - -class StoreTarget -{ -public: - StoreTarget(const std::string &basepath); - virtual ~StoreTarget(); - - uint64_t Save(); - void SaveAlignment(); - - void Append(const line_text &line, bool log_prob, bool scfg); -protected: - std::string m_basePath; - std::fstream m_fileTargetColl; - StoreVocab m_vocab; - - typedef boost::unordered_map, uint32_t> Alignments; - Alignments m_aligns; - - std::vector m_coll; - - uint32_t GetAlignId(const std::vector &align); - void Save(const target_text &rule); - - void AppendLexRO(std::string &prop, std::vector &retvector, - bool log_prob) const; - -}; - -} /* namespace Moses2 */ - diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.cpp b/moses/TranslationModel/ProbingPT/StoreVocab.cpp deleted file mode 100644 index 6515bac63..000000000 --- a/moses/TranslationModel/ProbingPT/StoreVocab.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* - * StoreVocab.cpp - * - * Created on: 15 Jun 2016 - * Author: hieu - */ -#include -#include "StoreVocab.h" - -namespace Moses -{ - -} /* namespace Moses2 */ diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.h b/moses/TranslationModel/ProbingPT/StoreVocab.h deleted file mode 100644 index 806dcebf4..000000000 --- a/moses/TranslationModel/ProbingPT/StoreVocab.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * StoreVocab.h - * - * Created on: 15 Jun 2016 - * Author: hieu - */ -#pragma once -#include -#include -#include "moses/OutputFileStream.h" -#include "moses/Util.h" - -namespace Moses -{ - -template -class StoreVocab -{ -protected: - std::string m_path; - - typedef boost::unordered_map Coll; - Coll m_vocab; - -public: - StoreVocab(const std::string &path) - :m_path(path) - {} - - virtual ~StoreVocab() {} - - VOCABID GetVocabId(const std::string &word) { - typename Coll::iterator iter = m_vocab.find(word); - if (iter == m_vocab.end()) { - VOCABID ind = m_vocab.size() + 1; - m_vocab[word] = ind; - return ind; - } else { - return iter->second; - } - } - - void Insert(VOCABID id, const std::string &word) { - m_vocab[word] = id; - } - - void Save() { - OutputFileStream strme(m_path); - - typename Coll::const_iterator iter; - for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) { - strme << iter->first << "\t" << iter->second << std::endl; - } - - strme.Close(); - } -}; - -} /* namespace Moses2 */ - diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp deleted file mode 100644 index 47242e25d..000000000 --- a/moses/TranslationModel/ProbingPT/hash.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include "hash.hh" - -using namespace std; - -namespace Moses -{ - -uint64_t getHash(StringPiece text) -{ - std::size_t len = text.size(); - uint64_t key = util::MurmurHashNative(text.data(), len); - return key; -} - -std::vector getVocabIDs(const StringPiece &textin) -{ - //Tokenize - std::vector output; - - util::TokenIter itWord(textin, util::SingleCharacter(' ')); - - while (itWord) { - StringPiece word = *itWord; - uint64_t id = 0; - - util::TokenIter itFactor(word, util::SingleCharacter('|')); - while (itFactor) { - StringPiece factor = *itFactor; - //cerr << "factor=" << factor << endl; - - id += getHash(factor); - itFactor++; - } - - output.push_back(id); - itWord++; - } - - return output; -} - -} - diff --git a/moses/TranslationModel/ProbingPT/hash.hh b/moses/TranslationModel/ProbingPT/hash.hh deleted file mode 100644 index f218ad9da..000000000 --- a/moses/TranslationModel/ProbingPT/hash.hh +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "util/string_piece.hh" -#include "util/murmur_hash.hh" -#include "util/string_piece.hh" //Tokenization and work with StringPiece -#include "util/tokenize_piece.hh" -#include - -namespace Moses -{ - -//Gets the MurmurmurHash for give string -uint64_t getHash(StringPiece text); - -std::vector getVocabIDs(const StringPiece &textin); - -} diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp deleted file mode 100644 index cb9e47fec..000000000 --- a/moses/TranslationModel/ProbingPT/line_splitter.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "line_splitter.hh" - -namespace Moses -{ - -line_text splitLine(const StringPiece &textin, bool scfg) -{ - const char delim[] = "|||"; - line_text output; - - //Tokenize - util::TokenIter it(textin, util::MultiCharacter(delim)); - //Get source phrase - output.source_phrase = Trim(*it); - //std::cerr << "output.source_phrase=" << output.source_phrase << "AAAA" << std::endl; - - //Get target_phrase - it++; - output.target_phrase = Trim(*it); - //std::cerr << "output.target_phrase=" << output.target_phrase << "AAAA" << std::endl; - - if (scfg) { - /* - std::cerr << "output.source_phrase=" << output.source_phrase << std::endl; - std::cerr << "output.target_phrase=" << output.target_phrase << std::endl; - reformatSCFG(output); - std::cerr << "output.source_phrase=" << output.source_phrase << std::endl; - std::cerr << "output.target_phrase=" << output.target_phrase << std::endl; - */ - } - - //Get probabilities - it++; - output.prob = Trim(*it); - //std::cerr << "output.prob=" << output.prob << "AAAA" << std::endl; - - //Get WordAllignment - it++; - if (it == util::TokenIter::end()) return output; - output.word_align = Trim(*it); - //std::cerr << "output.word_align=" << output.word_align << "AAAA" << std::endl; - - //Get count - it++; - if (it == util::TokenIter::end()) return output; - output.counts = Trim(*it); - //std::cerr << "output.counts=" << output.counts << "AAAA" << std::endl; - - //Get sparse_score - it++; - if (it == util::TokenIter::end()) return output; - output.sparse_score = Trim(*it); - //std::cerr << "output.sparse_score=" << output.sparse_score << "AAAA" << std::endl; - - //Get property - it++; - if (it == util::TokenIter::end()) return output; - output.property = Trim(*it); - //std::cerr << "output.property=" << output.property << "AAAA" << std::endl; - - return output; -} - -std::vector splitWordAll1(const StringPiece &textin) -{ - const char delim[] = " "; - const char delim2[] = "-"; - std::vector output; - - //Case with no word alignments. - if (textin.size() == 0) { - return output; - } - - //Split on space - util::TokenIter it(textin, util::MultiCharacter(delim)); - - //For each int - while (it) { - //Split on dash (-) - util::TokenIter itInner(*it, - util::MultiCharacter(delim2)); - - //Insert the two entries in the vector. User will read entry 0 and 1 to get the first, - //2 and 3 for second etc. Use unsigned char instead of int to save space, as - //word allignments are all very small numbers that fit in a single byte - output.push_back((unsigned char) (atoi(itInner->data()))); - itInner++; - output.push_back((unsigned char) (atoi(itInner->data()))); - it++; - } - - return output; - -} - -void reformatSCFG(line_text &output) -{ - -} - -} - diff --git a/moses/TranslationModel/ProbingPT/line_splitter.hh b/moses/TranslationModel/ProbingPT/line_splitter.hh deleted file mode 100644 index 01b86fc9b..000000000 --- a/moses/TranslationModel/ProbingPT/line_splitter.hh +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include "util/string_piece.hh" -#include "util/tokenize_piece.hh" -#include "util/file_piece.hh" -#include -#include //atof -#include "util/string_piece.hh" //Tokenization and work with StringPiece -#include "util/tokenize_piece.hh" -#include - -namespace Moses -{ - -//Struct for holding processed line -struct line_text { - StringPiece source_phrase; - StringPiece target_phrase; - StringPiece prob; - StringPiece word_align; - StringPiece counts; - StringPiece sparse_score; - StringPiece property; - std::string property_to_be_binarized; -}; - -//Struct for holding processed line -struct target_text { - std::vector target_phrase; - std::vector prob; - std::vector word_align_term; - std::vector word_align_non_term; - std::vector counts; - std::vector sparse_score; - std::vector property; - - /* - void Reset() - { - target_phrase.clear(); - prob.clear(); - word_all1.clear(); - counts.clear(); - sparse_score.clear(); - property.clear(); - } - */ -}; - -//Ask if it's better to have it receive a pointer to a line_text struct -line_text splitLine(const StringPiece &textin, bool scfg); -void reformatSCFG(line_text &output); - -std::vector splitWordAll1(const StringPiece &textin); - -} - diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp deleted file mode 100644 index f23f57d66..000000000 --- a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "probing_hash_utils.hh" - -namespace Moses -{ - -//Read table from disk, return memory map location -char * readTable(const char * filename, size_t size) -{ - //Initial position of the file is the end of the file, thus we know the size - int fd; - char * map; - - fd = open(filename, O_RDONLY); - if (fd == -1) { - perror("Error opening file for reading"); - exit(EXIT_FAILURE); - } - - map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); - - if (map == MAP_FAILED) { - close(fd); - perror("Error mmapping the file"); - exit(EXIT_FAILURE); - } - - return map; -} - -void serialize_table(char *mem, size_t size, const std::string &filename) -{ - std::ofstream os(filename.c_str(), std::ios::binary); - os.write((const char*) &mem[0], size); - os.close(); - -} - -uint64_t getKey(const uint64_t source_phrase[], size_t size) -{ - //TOO SLOW - //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); - uint64_t key = 0; - for (size_t i = 0; i < size; i++) { - key += (source_phrase[i] << i); - } - return key; -} - -} - diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses/TranslationModel/ProbingPT/probing_hash_utils.hh deleted file mode 100644 index 998686b2e..000000000 --- a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "util/probing_hash_table.hh" - -#include -#include -#include -#include - -namespace Moses -{ - -#define API_VERSION 15 - -//Hash table entry -struct Entry { - typedef uint64_t Key; - Key key; - - Key GetKey() const { - return key; - } - - void SetKey(Key to) { - key = to; - } - - uint64_t value; -}; - -#define NONE std::numeric_limits::max() - -//Define table -typedef util::ProbingHashTable > Table; - -void serialize_table(char *mem, size_t size, const std::string &filename); - -char * readTable(const char * filename, size_t size); - -uint64_t getKey(const uint64_t source_phrase[], size_t size); - -struct TargetPhraseInfo { - uint32_t alignTerm; - uint32_t alignNonTerm; - uint16_t numWords; - uint16_t propLength; - uint16_t filler; -}; - -} - diff --git a/moses/TranslationModel/ProbingPT/querying.cpp b/moses/TranslationModel/ProbingPT/querying.cpp deleted file mode 100644 index 10c35e361..000000000 --- a/moses/TranslationModel/ProbingPT/querying.cpp +++ /dev/null @@ -1,141 +0,0 @@ -#include "querying.hh" -#include "util/exception.hh" - -using namespace std; - -namespace Moses -{ - -QueryEngine::QueryEngine(const char * filepath) -{ - - //Create filepaths - std::string basepath(filepath); - std::string path_to_config = basepath + "/config"; - std::string path_to_hashtable = basepath + "/probing_hash.dat"; - std::string path_to_source_vocabid = basepath + "/source_vocabids"; - std::string alignPath = basepath + "/Alignments.dat"; - - if (!FileExists(path_to_config)) { - UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); - } - - ///Source phrase vocabids - read_map(source_vocabids, path_to_source_vocabid.c_str()); - - // alignments - read_alignments(alignPath); - - //Read config file - boost::unordered_map keyValue; - - std::ifstream config(path_to_config.c_str()); - std::string line; - while (getline(config, line)) { - std::vector toks = Tokenize(line, "\t"); - UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line); - keyValue[ toks[0] ] = toks[1]; - } - - bool found; - //Check API version: - int version; - found = Get(keyValue, "API_VERSION", version); - if (!found) { - std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; - } else if (version != API_VERSION) { - std::cerr << "The ProbingPT API has changed. " << version << "!=" - << API_VERSION << " Please rebinarize your phrase tables." << std::endl; - exit(EXIT_FAILURE); - } - - //Get tablesize. - int tablesize; - found = Get(keyValue, "uniq_entries", tablesize); - if (!found) { - std::cerr << "uniq_entries not found" << std::endl; - exit(EXIT_FAILURE); - } - - //Number of scores - found = Get(keyValue, "num_scores", num_scores); - if (!found) { - std::cerr << "num_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - //How may scores from lex reordering models - found = Get(keyValue, "num_lex_scores", num_lex_scores); - if (!found) { - std::cerr << "num_lex_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - // have the scores been log() and FloorScore()? - found = Get(keyValue, "log_prob", logProb); - if (!found) { - std::cerr << "logProb not found" << std::endl; - exit(EXIT_FAILURE); - } - - config.close(); - - //Read hashtable - table_filesize = Table::Size(tablesize, 1.2); - mem = readTable(path_to_hashtable.c_str(), table_filesize); - Table table_init(mem, table_filesize); - table = table_init; - - std::cerr << "Initialized successfully! " << std::endl; -} - -QueryEngine::~QueryEngine() -{ - //Clear mmap content from memory. - munmap(mem, table_filesize); - -} - -uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const -{ - //TOO SLOW - //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); - return Moses::getKey(source_phrase, size); -} - -std::pair QueryEngine::query(uint64_t key) -{ - std::pair ret; - - const Entry * entry; - ret.first = table.Find(key, entry); - if (ret.first) { - ret.second = entry->value; - } - return ret; -} - -void QueryEngine::read_alignments(const std::string &alignPath) -{ - std::ifstream strm(alignPath.c_str()); - - string line; - while (getline(strm, line)) { - vector toks = Tokenize(line, "\t "); - UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); - - uint32_t alignInd = Scan(toks[0]); - if (alignInd >= alignColl.size()) { - alignColl.resize(alignInd + 1); - } - - Alignments &aligns = alignColl[alignInd]; - for (size_t i = 1; i < toks.size(); ++i) { - size_t pos = Scan(toks[i]); - aligns.push_back(pos); - } - } -} - -} - diff --git a/moses/TranslationModel/ProbingPT/querying.hh b/moses/TranslationModel/ProbingPT/querying.hh deleted file mode 100644 index 915bc4806..000000000 --- a/moses/TranslationModel/ProbingPT/querying.hh +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include -#include //For finding size of file -#include "vocabid.hh" -#include //toLower -#include -#include "probing_hash_utils.hh" -#include "hash.hh" //Includes line splitter -#include "line_splitter.hh" -#include "moses//Util.h" - -namespace Moses -{ - -class QueryEngine -{ - std::map source_vocabids; - - typedef std::vector Alignments; - std::vector alignColl; - - Table table; - char *mem; //Memory for the table, necessary so that we can correctly destroy the object - - size_t table_filesize; - bool is_reordering; - - void read_alignments(const std::string &alignPath); - -public: - int num_scores; - int num_lex_scores; - bool logProb; - - QueryEngine(const char *); - ~QueryEngine(); - - std::pair query(uint64_t key); - - const std::map &getSourceVocab() const { - return source_vocabids; - } - - const std::vector &getAlignments() const { - return alignColl; - } - - uint64_t getKey(uint64_t source_phrase[], size_t size) const; - - template - inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const { - boost::unordered_map::const_iterator iter = keyValue.find(sought); - if (iter == keyValue.end()) { - return false; - } - - const std::string &foundStr = iter->second; - found = Scan(foundStr); - return true; - } - -}; - -} - diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/moses/TranslationModel/ProbingPT/storing.cpp deleted file mode 100644 index baf6ae91e..000000000 --- a/moses/TranslationModel/ProbingPT/storing.cpp +++ /dev/null @@ -1,298 +0,0 @@ -#include -#include -#include "line_splitter.hh" -#include "storing.hh" -#include "StoreTarget.h" -#include "StoreVocab.h" -#include "moses/Util.h" -#include "moses/InputFileStream.h" - -using namespace std; - -namespace Moses -{ - -/////////////////////////////////////////////////////////////////////// -void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos) -{ - if (pos < sourcePhrase.size()) { - uint64_t vocabId = sourcePhrase[pos]; - - Node *child; - Children::iterator iter = m_children.find(vocabId); - if (iter == m_children.end()) { - // New node. Write other children then discard them - BOOST_FOREACH(Children::value_type &valPair, m_children) { - Node &otherChild = valPair.second; - otherChild.Write(table); - } - m_children.clear(); - - // create new node - child = &m_children[vocabId]; - assert(!child->done); - child->key = key + (vocabId << pos); - } else { - child = &iter->second; - } - - child->Add(table, sourcePhrase, pos + 1); - } else { - // this node was written previously 'cos it has rules - done = true; - } -} - -void Node::Write(Table &table) -{ - //cerr << "START write " << done << " " << key << endl; - BOOST_FOREACH(Children::value_type &valPair, m_children) { - Node &child = valPair.second; - child.Write(table); - } - - if (!done) { - // save - Entry sourceEntry; - sourceEntry.value = NONE; - sourceEntry.key = key; - - //Put into table - table.Insert(sourceEntry); - } -} - -/////////////////////////////////////////////////////////////////////// -void createProbingPT(const std::string &phrasetable_path, - const std::string &basepath, int num_scores, int num_lex_scores, - bool log_prob, int max_cache_size, bool scfg) -{ - std::cerr << "Starting..." << std::endl; - - //Get basepath and create directory if missing - mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - - StoreTarget storeTarget(basepath); - - //Get uniq lines: - unsigned long uniq_entries = countUniqueSource(phrasetable_path); - - //Source phrase vocabids - StoreVocab sourceVocab(basepath + "/source_vocabids"); - - //Read the file - util::FilePiece filein(phrasetable_path.c_str()); - - //Init the probing hash table - size_t size = Table::Size(uniq_entries, 1.2); - char * mem = new char[size]; - memset(mem, 0, size); - Table sourceEntries(mem, size); - - std::priority_queue, CacheItemOrderer> cache; - float totalSourceCount = 0; - - //Keep track of the size of each group of target phrases - size_t line_num = 0; - - //Read everything and processs - std::string prevSource; - - Node sourcePhrases; - sourcePhrases.done = true; - sourcePhrases.key = 0; - - while (true) { - try { - //Process line read - line_text line; - line = splitLine(filein.ReadLine(), scfg); - //cerr << "line=" << line.source_phrase << endl; - - ++line_num; - if (line_num % 1000000 == 0) { - std::cerr << line_num << " " << std::flush; - } - - //Add source phrases to vocabularyIDs - add_to_map(sourceVocab, line.source_phrase); - - if (prevSource.empty()) { - // 1st line - prevSource = line.source_phrase.as_string(); - storeTarget.Append(line, log_prob, scfg); - } else if (prevSource == line.source_phrase) { - //If we still have the same line, just append to it: - storeTarget.Append(line, log_prob, scfg); - } else { - assert(prevSource != line.source_phrase); - - //Create a new entry even - - // save - uint64_t targetInd = storeTarget.Save(); - - // next line - storeTarget.Append(line, log_prob, scfg); - - //Create an entry for the previous source phrase: - Entry sourceEntry; - sourceEntry.value = targetInd; - //The key is the sum of hashes of individual words bitshifted by their position in the phrase. - //Probably not entirerly correct, but fast and seems to work fine in practise. - std::vector vocabid_source = getVocabIDs(prevSource); - if (scfg) { - // storing prefixes? - sourcePhrases.Add(sourceEntries, vocabid_source); - } - sourceEntry.key = getKey(vocabid_source); - - /* - cerr << "prevSource=" << prevSource << flush - << " vocabids=" << Debug(vocabid_source) << flush - << " key=" << sourceEntry.key << endl; - */ - //Put into table - sourceEntries.Insert(sourceEntry); - - // update cache - CURRENT source phrase, not prev - if (max_cache_size) { - std::string countStr = line.counts.as_string(); - countStr = Trim(countStr); - if (!countStr.empty()) { - std::vector toks = Tokenize(countStr); - //cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl; - - if (toks.size() >= 2) { - totalSourceCount += toks[1]; - - // compute key for CURRENT source - std::vector currVocabidSource = getVocabIDs(line.source_phrase.as_string()); - uint64_t currKey = getKey(currVocabidSource); - - CacheItem *item = new CacheItem( - Trim(line.source_phrase.as_string()), - currKey, - toks[1]); - cache.push(item); - - if (max_cache_size > 0 && cache.size() > max_cache_size) { - cache.pop(); - } - } - } - } - - //Set prevLine - prevSource = line.source_phrase.as_string(); - } - - } catch (util::EndOfFileException e) { - std::cerr - << "Reading phrase table finished, writing remaining files to disk." - << std::endl; - - //After the final entry is constructed we need to add it to the phrase_table - //Create an entry for the previous source phrase: - uint64_t targetInd = storeTarget.Save(); - - Entry sourceEntry; - sourceEntry.value = targetInd; - - //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast - std::vector vocabid_source = getVocabIDs(prevSource); - sourceEntry.key = getKey(vocabid_source); - - //Put into table - sourceEntries.Insert(sourceEntry); - - break; - } - } - - sourcePhrases.Write(sourceEntries); - - storeTarget.SaveAlignment(); - - serialize_table(mem, size, (basepath + "/probing_hash.dat")); - - sourceVocab.Save(); - - serialize_cache(cache, (basepath + "/cache"), totalSourceCount); - - delete[] mem; - - //Write configfile - std::ofstream configfile; - configfile.open((basepath + "/config").c_str()); - configfile << "API_VERSION\t" << API_VERSION << '\n'; - configfile << "uniq_entries\t" << uniq_entries << '\n'; - configfile << "num_scores\t" << num_scores << '\n'; - configfile << "num_lex_scores\t" << num_lex_scores << '\n'; - configfile << "log_prob\t" << log_prob << '\n'; - configfile.close(); -} - -size_t countUniqueSource(const std::string &path) -{ - size_t ret = 0; - InputFileStream strme(path); - - std::string line, prevSource; - while (std::getline(strme, line)) { - std::vector toks = TokenizeMultiCharSeparator(line, "|||"); - assert(toks.size() != 0); - - if (prevSource != toks[0]) { - prevSource = toks[0]; - ++ret; - } - } - - return ret; -} - -void serialize_cache( - std::priority_queue, CacheItemOrderer> &cache, - const std::string &path, float totalSourceCount) -{ - std::vector vec(cache.size()); - - size_t ind = cache.size() - 1; - while (!cache.empty()) { - const CacheItem *item = cache.top(); - vec[ind] = item; - cache.pop(); - --ind; - } - - std::ofstream os(path.c_str()); - - os << totalSourceCount << std::endl; - for (size_t i = 0; i < vec.size(); ++i) { - const CacheItem *item = vec[i]; - os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl; - delete item; - } - - os.close(); -} - -uint64_t getKey(const std::vector &vocabid_source) -{ - return getKey(vocabid_source.data(), vocabid_source.size()); -} - -std::vector CreatePrefix(const std::vector &vocabid_source, size_t endPos) -{ - assert(endPos < vocabid_source.size()); - - std::vector ret(endPos + 1); - for (size_t i = 0; i <= endPos; ++i) { - ret[i] = vocabid_source[i]; - } - return ret; -} - -} - diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh deleted file mode 100644 index 994067515..000000000 --- a/moses/TranslationModel/ProbingPT/storing.hh +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include //mkdir - -#include "hash.hh" //Includes line_splitter -#include "probing_hash_utils.hh" - -#include "util/file_piece.hh" -#include "util/file.hh" -#include "vocabid.hh" - -namespace Moses -{ -typedef std::vector SourcePhrase; - - -class Node -{ - typedef boost::unordered_map Children; - Children m_children; - -public: - uint64_t key; - bool done; - - Node() - :done(false) - {} - - void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0); - void Write(Table &table); -}; - - -void createProbingPT(const std::string &phrasetable_path, - const std::string &basepath, int num_scores, int num_lex_scores, - bool log_prob, int max_cache_size, bool scfg); -uint64_t getKey(const std::vector &source_phrase); - -std::vector CreatePrefix(const std::vector &vocabid_source, size_t endPos); - -template -std::string Debug(const std::vector &vec) -{ - std::stringstream strm; - for (size_t i = 0; i < vec.size(); ++i) { - strm << vec[i] << " "; - } - return strm.str(); -} - -size_t countUniqueSource(const std::string &path); - -class CacheItem -{ -public: - std::string source; - uint64_t sourceKey; - float count; - CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount) - :source(vSource) - ,sourceKey(vSourceKey) - ,count(vCount) { - } - - bool operator<(const CacheItem &other) const { - return count > other.count; - } -}; - -class CacheItemOrderer -{ -public: - bool operator()(const CacheItem* a, const CacheItem* b) const { - return (*a) < (*b); - } -}; - -void serialize_cache( - std::priority_queue, CacheItemOrderer> &cache, - const std::string &path, float totalSourceCount); - -} - diff --git a/moses/TranslationModel/ProbingPT/vocabid.cpp b/moses/TranslationModel/ProbingPT/vocabid.cpp deleted file mode 100644 index d6f442323..000000000 --- a/moses/TranslationModel/ProbingPT/vocabid.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include "vocabid.hh" -#include "StoreVocab.h" -#include "moses/Util.h" - -namespace Moses -{ - -void add_to_map(StoreVocab &sourceVocab, - const StringPiece &textin) -{ - //Tokenize - util::TokenIter itWord(textin, util::SingleCharacter(' ')); - - while (itWord) { - StringPiece word = *itWord; - - util::TokenIter itFactor(word, util::SingleCharacter('|')); - while (itFactor) { - StringPiece factor = *itFactor; - - sourceVocab.Insert(getHash(factor), factor.as_string()); - itFactor++; - } - itWord++; - } -} - -void serialize_map(const std::map &karta, - const std::string &filename) -{ - std::ofstream os(filename.c_str()); - - std::map::const_iterator iter; - for (iter = karta.begin(); iter != karta.end(); ++iter) { - os << iter->first << '\t' << iter->second << std::endl; - } - - os.close(); -} - -void read_map(std::map &karta, const char* filename) -{ - std::ifstream is(filename); - - std::string line; - while (getline(is, line)) { - std::vector toks = Tokenize(line, "\t"); - assert(toks.size() == 2); - uint64_t ind = Scan(toks[1]); - karta[ind] = toks[0]; - } - - //Close the stream after we are done. - is.close(); -} - -} - diff --git a/moses/TranslationModel/ProbingPT/vocabid.hh b/moses/TranslationModel/ProbingPT/vocabid.hh deleted file mode 100644 index 7e1390874..000000000 --- a/moses/TranslationModel/ProbingPT/vocabid.hh +++ /dev/null @@ -1,29 +0,0 @@ -//Serialization -#include -#include -#include -#include -#include -#include -#include - -#include //Container -#include "hash.hh" //Hash of elements - -#include "util/string_piece.hh" //Tokenization and work with StringPiece -#include "util/tokenize_piece.hh" - -namespace Moses -{ -template -class StoreVocab; - -void add_to_map(StoreVocab &sourceVocab, - const StringPiece &textin); - -void serialize_map(const std::map &karta, - const std::string &filename); - -void read_map(std::map &karta, const char* filename); - -} diff --git a/probingpt/Jamfile b/probingpt/Jamfile index b8560c8c6..7ea4c9d7d 100644 --- a/probingpt/Jamfile +++ b/probingpt/Jamfile @@ -11,6 +11,7 @@ lib probingpt : vocabid.cpp OutputFileStream.cpp InputFileStream.cpp + util.cpp # ../util/string_piece.cc # ../util/exception.cc diff --git a/probingpt/querying.hh b/probingpt/querying.hh index 4bb9d0c96..74d7e3f6b 100644 --- a/probingpt/querying.hh +++ b/probingpt/querying.hh @@ -10,6 +10,7 @@ #include "hash.hh" //Includes line splitter #include "line_splitter.hh" #include "moses2/legacy/Util2.h" +#include "util.hh" namespace probingpt { @@ -68,7 +69,7 @@ public: } const std::string &foundStr = iter->second; - found = Moses2::Scan(foundStr); + found = Scan(foundStr); return true; } From 023a946a5be0c0259e884380725a7cf86af3e313 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 11:33:24 +0000 Subject: [PATCH 134/176] implement SetParameter for load_method --- moses/TranslationModel/ProbingPT.cpp | 22 ++++++++++++++++++++++ moses/TranslationModel/ProbingPT.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/moses/TranslationModel/ProbingPT.cpp b/moses/TranslationModel/ProbingPT.cpp index 2a7369622..9a8d4c700 100644 --- a/moses/TranslationModel/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT.cpp @@ -118,6 +118,28 @@ void ProbingPT::CreateAlignmentMap(const std::string path) } } +void ProbingPT::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "load") { + if (value == "lazy") { + load_method = util::LAZY; + } else if (value == "populate_or_lazy") { + load_method = util::POPULATE_OR_LAZY; + } else if (value == "populate_or_read" || value == "populate") { + load_method = util::POPULATE_OR_READ; + } else if (value == "read") { + load_method = util::READ; + } else if (value == "parallel_read") { + load_method = util::PARALLEL_READ; + } else { + UTIL_THROW2("load method not supported" << value); + } + } else { + PhraseDictionary::SetParameter(key, value); + } + +} + void ProbingPT::InitializeForInput(ttasksptr const& ttask) { diff --git a/moses/TranslationModel/ProbingPT.h b/moses/TranslationModel/ProbingPT.h index bdf5a3bda..1c996f5fa 100644 --- a/moses/TranslationModel/ProbingPT.h +++ b/moses/TranslationModel/ProbingPT.h @@ -30,6 +30,8 @@ public: void InitializeForInput(ttasksptr const& ttask); + void SetParameter(const std::string& key, const std::string& value); + // for phrase-based model void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; From 2483c3595bb32ec616f7c3dcf8f6e4bb8a80fb84 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 11:34:29 +0000 Subject: [PATCH 135/176] new files --- probingpt/util.cpp | 24 ++++++++++++++++++++++++ probingpt/util.hh | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 probingpt/util.cpp create mode 100644 probingpt/util.hh diff --git a/probingpt/util.cpp b/probingpt/util.cpp new file mode 100644 index 000000000..004da3b35 --- /dev/null +++ b/probingpt/util.cpp @@ -0,0 +1,24 @@ +#include "util.hh" +#include "util/exception.hh" + +namespace probingpt +{ + +template<> +bool Scan(const std::string &input) +{ + std::string lc = ToLower(input); + if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true; + if (lc == "no" || lc == "n" || lc == "false" || lc == "0") return false; + UTIL_THROW2("Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0."); +} + +const std::string ToLower(const std::string& str) +{ + std::string lc(str); + std::transform(lc.begin(), lc.end(), lc.begin(), (int (*)(int))std::tolower); + return + lc ; +} + +} diff --git a/probingpt/util.hh b/probingpt/util.hh new file mode 100644 index 000000000..b1e2ad0b9 --- /dev/null +++ b/probingpt/util.hh @@ -0,0 +1,24 @@ +#pragma once +#include +#include + +namespace probingpt +{ + +//! convert string to variable of type T. Used to reading floats, int etc from files +template +inline T Scan(const std::string &input) +{ + std::stringstream stream(input); + T ret; + stream >> ret; + return ret; +} + +//! Specialisation to understand yes/no y/n true/false 0/1 +template<> +bool Scan(const std::string &input); + +const std::string ToLower(const std::string& str); + +} From b0199c05322bbf1113c37b3bae38efcbd2f1bfaa Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 12:16:37 +0000 Subject: [PATCH 136/176] change include path in factory --- moses/FF/Factory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 9ae145504..5d37c4fed 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -14,7 +14,7 @@ #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h" -#include "moses/TranslationModel/ProbingPT/ProbingPT.h" +#include "moses/TranslationModel/ProbingPT.h" #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h" #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h" From f5c667ea1d68d649c122fcefdc52cf5123093ada Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 16 Feb 2017 12:35:43 +0000 Subject: [PATCH 137/176] eclipse --- contrib/other-builds/CreateOnDiskPt/.cproject | 2 ++ contrib/other-builds/moses-cmd/.cproject | 18 ++++++++++-------- contrib/other-builds/moses-cmd/.project | 1 + contrib/other-builds/probingpt/.project | 10 ++++++++++ contrib/other-builds/server/.cproject | 2 ++ 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject index 95c0a6a01..80f62e22a 100644 --- a/contrib/other-builds/CreateOnDiskPt/.cproject +++ b/contrib/other-builds/CreateOnDiskPt/.cproject @@ -65,12 +65,14 @@ + - + @@ -37,14 +37,14 @@ - - - @@ -82,7 +83,7 @@ - + @@ -99,13 +100,13 @@ - - diff --git a/contrib/other-builds/consolidate/.cproject b/contrib/other-builds/consolidate/.cproject index 410fbcb8c..d4b35e500 100644 --- a/contrib/other-builds/consolidate/.cproject +++ b/contrib/other-builds/consolidate/.cproject @@ -5,7 +5,7 @@ - + @@ -21,17 +21,18 @@ - - @@ -90,7 +91,7 @@ - + @@ -106,13 +107,13 @@ - - diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject index 4a07699dc..d0ebe75f8 100644 --- a/contrib/other-builds/extract-ghkm/.cproject +++ b/contrib/other-builds/extract-ghkm/.cproject @@ -5,33 +5,34 @@ + - - + - - @@ -61,29 +62,29 @@ + - - + - - diff --git a/contrib/other-builds/extract-mixed-syntax/.cproject b/contrib/other-builds/extract-mixed-syntax/.cproject index f246b0c32..3507b8755 100644 --- a/contrib/other-builds/extract-mixed-syntax/.cproject +++ b/contrib/other-builds/extract-mixed-syntax/.cproject @@ -5,25 +5,25 @@ + - - + - - @@ -65,29 +66,29 @@ + - - + - - diff --git a/contrib/other-builds/extract-rules/.cproject b/contrib/other-builds/extract-rules/.cproject index afeef551b..6867c15f9 100644 --- a/contrib/other-builds/extract-rules/.cproject +++ b/contrib/other-builds/extract-rules/.cproject @@ -5,7 +5,7 @@ - + @@ -21,9 +21,9 @@ - - @@ -62,7 +63,7 @@ - + @@ -78,13 +79,13 @@ - - diff --git a/contrib/other-builds/extract/.cproject b/contrib/other-builds/extract/.cproject index 4c80306be..63e57b8b7 100644 --- a/contrib/other-builds/extract/.cproject +++ b/contrib/other-builds/extract/.cproject @@ -5,7 +5,7 @@ - + @@ -21,17 +21,18 @@ - - @@ -62,7 +63,7 @@ - + @@ -78,13 +79,13 @@ - - diff --git a/contrib/other-builds/extractor/.cproject b/contrib/other-builds/extractor/.cproject index 79805f176..728ed4410 100644 --- a/contrib/other-builds/extractor/.cproject +++ b/contrib/other-builds/extractor/.cproject @@ -5,7 +5,7 @@ - + @@ -21,17 +21,18 @@ - - @@ -69,7 +70,7 @@ - + @@ -85,13 +86,13 @@ - - diff --git a/contrib/other-builds/lm/.cproject b/contrib/other-builds/lm/.cproject index 3455890f7..4f428751d 100644 --- a/contrib/other-builds/lm/.cproject +++ b/contrib/other-builds/lm/.cproject @@ -11,16 +11,16 @@ + + - - - + @@ -37,14 +37,14 @@ - - - - - - @@ -73,17 +71,17 @@ + + - - - + @@ -100,13 +98,13 @@ - - diff --git a/contrib/other-builds/mert_lib/.cproject b/contrib/other-builds/mert_lib/.cproject index 908ecf784..a3deea5c0 100644 --- a/contrib/other-builds/mert_lib/.cproject +++ b/contrib/other-builds/mert_lib/.cproject @@ -11,7 +11,7 @@ - + @@ -26,20 +26,21 @@ - - - @@ -66,7 +67,7 @@ - + @@ -81,13 +82,13 @@ - - diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject index f1a9f0575..2dde393bc 100644 --- a/contrib/other-builds/moses-cmd/.cproject +++ b/contrib/other-builds/moses-cmd/.cproject @@ -22,15 +22,15 @@ - - - @@ -128,13 +129,13 @@ - - diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 848b22951..5080c02ad 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -50,6 +50,7 @@ @@ -101,13 +102,13 @@ - - diff --git a/contrib/other-builds/probingpt/.cproject b/contrib/other-builds/probingpt/.cproject index 05cae6aa9..4605b3cf7 100644 --- a/contrib/other-builds/probingpt/.cproject +++ b/contrib/other-builds/probingpt/.cproject @@ -36,6 +36,7 @@ + diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject index d904122eb..a6d4e1b88 100644 --- a/contrib/other-builds/score/.cproject +++ b/contrib/other-builds/score/.cproject @@ -5,7 +5,7 @@ - + @@ -21,17 +21,18 @@ - - @@ -80,7 +81,7 @@ - + @@ -96,13 +97,13 @@ - - diff --git a/contrib/other-builds/search/.cproject b/contrib/other-builds/search/.cproject index 44ae0e94e..ad505c569 100644 --- a/contrib/other-builds/search/.cproject +++ b/contrib/other-builds/search/.cproject @@ -20,29 +20,30 @@ - + - - - @@ -75,20 +76,20 @@ - + - - @@ -136,4 +137,3 @@ - diff --git a/contrib/other-builds/server/.cproject b/contrib/other-builds/server/.cproject index 18264ab0d..153b8c8e4 100644 --- a/contrib/other-builds/server/.cproject +++ b/contrib/other-builds/server/.cproject @@ -22,14 +22,14 @@ - - - @@ -121,13 +122,13 @@ - - diff --git a/contrib/other-builds/util/.cproject b/contrib/other-builds/util/.cproject index 8c7e4221b..ff268247c 100644 --- a/contrib/other-builds/util/.cproject +++ b/contrib/other-builds/util/.cproject @@ -37,14 +37,14 @@ - - - @@ -82,7 +83,7 @@ - + @@ -99,16 +100,16 @@ - - From 9f246cef899701a1fa0352f6aa4029258cf05292 Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 12 Apr 2017 09:52:31 +0800 Subject: [PATCH 160/176] added Dockerfiles for Moses --- scripts/docker/Dockerfile.ubuntu.basic | 26 ++++++++++ .../docker/Dockerfile.ubuntu.fastlightpbmt | 47 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 scripts/docker/Dockerfile.ubuntu.basic create mode 100644 scripts/docker/Dockerfile.ubuntu.fastlightpbmt diff --git a/scripts/docker/Dockerfile.ubuntu.basic b/scripts/docker/Dockerfile.ubuntu.basic new file mode 100644 index 000000000..ea1d9b9ba --- /dev/null +++ b/scripts/docker/Dockerfile.ubuntu.basic @@ -0,0 +1,26 @@ +FROM ubuntu:latest + +MAINTAINER Momo +LABEL description="Basic Moses docker container for Ubuntu" + +# Update Ubuntu. +RUN apt-get update +RUN apt-get install -y apt-utils debconf-utils +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections +RUN apt-get update && apt-get -y upgrade + +# Install some necessary tools. +RUN apt-get install -y nano perl + +# Install Moses dependencies. +RUN apt-get install -y libboost-all-dev +RUN apt-get install -y build-essential git-core pkg-config automake libtool wget zlib1g-dev python-dev libbz2-dev cmake + +# Clone the repos we need. +RUN git clone https://github.com/moses-smt/mosesdecoder.git + +# Install Moses. +WORKDIR /mosesdecoder +RUN make -f /mosesdecoder/mosesdecoder/contrib/Makefiles/install-dependencies.gmake +RUN ./compile.sh --max-kenlm-order=20 --max-factors=1000 +WORKDIR / diff --git a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt new file mode 100644 index 000000000..5fdcb13b4 --- /dev/null +++ b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt @@ -0,0 +1,47 @@ +FROM ubuntu:latest + +MAINTAINER Momo +LABEL description="Moses docker container for 'Faster and Lighter Phrase-based Machine Translation Baseline' (aka vanilla-moses)" + +# Update Ubuntu. +RUN apt-get update +RUN apt-get install -y apt-utils debconf-utils +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections +RUN apt-get update && apt-get -y upgrade + +# Install some necessary tools. +RUN apt-get install -y sudo nano perl python-dev python3-dev python-pip python3-pip curl wget tar dtrx + +# Install Moses dependencies. +RUN apt-get install -y libboost-all-dev +RUN apt-get install -y build-essential git-core pkg-config automake libtool wget zlib1g-dev python-dev libbz2-dev cmake + +# Clone the repos we need. +RUN git clone https://github.com/moses-smt/mosesdecoder.git +RUN git clone https://github.com/moses-smt/mgiza.git +RUN git clone https://github.com/jonsafari/clustercat.git + +# Install Moses. +WORKDIR /mosesdecoder +RUN make -f /mosesdecoder/mosesdecoder/contrib/Makefiles/install-dependencies.gmake +RUN ./compile.sh --max-kenlm-order=20 --max-factors=1000 +WORKDIR / + +# Install MGIZA++. +WORKDIR /mgiza/mgizapp +RUN cmake . && make && make install +RUN cp /mgiza/mgizapp/scripts/merge_alignment.py /mgiza/mgizapp/bin/ +WORKDIR / + +# Install clustercat. +WORKDIR /clustercat +RUN make -j 4 +WORKDIR / + +# Clean up the container. +RUN mkdir moses-training-tools +RUN cp /mgiza/mgizapp/bin/* /moses-training-tools/ +RUN cp /clustercat/bin/clustercat /moses-training-tools/ +RUN cp /clustercat/bin/mkcls /moses-training-tools/mkcls-clustercat +RUN mv /moses-training-tools/mkcls /moses-training-tools/mkcls-original +RUN cp /moses-training-tools/mkcls-clustercat /moses-training-tools/mkcls From 66cbf46e275efbeb994a551e26284997b686794c Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 12 Apr 2017 10:03:25 +0800 Subject: [PATCH 161/176] use static path to compile.sh --- scripts/docker/Dockerfile.ubuntu.fastlightpbmt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt index 5fdcb13b4..be17b83ba 100644 --- a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt +++ b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt @@ -24,7 +24,7 @@ RUN git clone https://github.com/jonsafari/clustercat.git # Install Moses. WORKDIR /mosesdecoder RUN make -f /mosesdecoder/mosesdecoder/contrib/Makefiles/install-dependencies.gmake -RUN ./compile.sh --max-kenlm-order=20 --max-factors=1000 +RUN /mosesdecoder/compile.sh --max-kenlm-order=20 --max-factors=1000 WORKDIR / # Install MGIZA++. From 793e64b7d504f3c5663939342a41f8e829b4e7b2 Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 12 Apr 2017 10:15:18 +0800 Subject: [PATCH 162/176] removed redundant subdirectory in path --- scripts/docker/Dockerfile.ubuntu.basic | 4 ++-- scripts/docker/Dockerfile.ubuntu.fastlightpbmt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/docker/Dockerfile.ubuntu.basic b/scripts/docker/Dockerfile.ubuntu.basic index ea1d9b9ba..adf204a52 100644 --- a/scripts/docker/Dockerfile.ubuntu.basic +++ b/scripts/docker/Dockerfile.ubuntu.basic @@ -21,6 +21,6 @@ RUN git clone https://github.com/moses-smt/mosesdecoder.git # Install Moses. WORKDIR /mosesdecoder -RUN make -f /mosesdecoder/mosesdecoder/contrib/Makefiles/install-dependencies.gmake -RUN ./compile.sh --max-kenlm-order=20 --max-factors=1000 +RUN make -f /mosesdecoder/contrib/Makefiles/install-dependencies.gmake +RUN /mosesdecoder/compile.sh --max-kenlm-order=20 --max-factors=1000 WORKDIR / diff --git a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt index be17b83ba..8a0479724 100644 --- a/scripts/docker/Dockerfile.ubuntu.fastlightpbmt +++ b/scripts/docker/Dockerfile.ubuntu.fastlightpbmt @@ -23,7 +23,7 @@ RUN git clone https://github.com/jonsafari/clustercat.git # Install Moses. WORKDIR /mosesdecoder -RUN make -f /mosesdecoder/mosesdecoder/contrib/Makefiles/install-dependencies.gmake +RUN make -f /mosesdecoder/contrib/Makefiles/install-dependencies.gmake RUN /mosesdecoder/compile.sh --max-kenlm-order=20 --max-factors=1000 WORKDIR / From b99af321138bb3661e063fb43113680f501ab97e Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Mon, 24 Apr 2017 12:16:36 +0100 Subject: [PATCH 163/176] fix split-input if it is passed, but if output-splitter is defined --- scripts/ems/experiment.meta | 58 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index d6e6dc133..4c0a9794e 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -969,21 +969,6 @@ parse-input-devtest pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval ignore-unless: use-mira template: $input-parser < IN > OUT -parse-relax-input - in: split-input - out: input - default-name: tuning/input.parse-relaxed - pass-unless: input-parse-relaxer - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - template: $input-parse-relaxer < IN > OUT -parse-relax-input-devtest - in: split-input-devtest - out: input-devtest - default-name: tuning/input.devtest.parse-relaxed - pass-unless: input-parse-relaxer - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - ignore-unless: use-mira - template: $input-parse-relaxer < IN > OUT factorize-input in: parsed-input out: factorized-input @@ -1059,6 +1044,21 @@ split-input-devtest pass-unless: input-splitter ignore-unless: use-mira template: $input-splitter -model IN1.$input-extension < IN > OUT +parse-relax-input + in: split-input + out: input + default-name: tuning/input.parse-relaxed + pass-unless: input-parse-relaxer + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + template: $input-parse-relaxer < IN > OUT +parse-relax-input-devtest + in: split-input-devtest + out: input-devtest + default-name: tuning/input.devtest.parse-relaxed + pass-unless: input-parse-relaxer + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + ignore-unless: use-mira + template: $input-parse-relaxer < IN > OUT reference-from-sgm in: reference-sgm input-sgm out: raw-reference @@ -1252,20 +1252,6 @@ mock-parse-input default-name: evaluation/input.mock-parsed pass-unless: mock-input-parser-devtesteval template: $mock-input-parser-devtesteval < IN > OUT -parse-input - in: mock-parsed-input - out: parsed-input - default-name: evaluation/input.parsed - pass-unless: input-parser - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - template: $input-parser < IN > OUT -parse-relax-input - in: split-input - out: input - default-name: evaluation/input.parse-relaxed - pass-unless: input-parse-relaxer - pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval - template: $input-parse-relaxer < IN > OUT factorize-input in: parsed-input out: factorized-input @@ -1303,6 +1289,20 @@ split-input default-name: evaluation/input.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT +parse-input + in: mock-parsed-input + out: parsed-input + default-name: evaluation/input.parsed + pass-unless: input-parser + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + template: $input-parser < IN > OUT +parse-relax-input + in: split-input + out: input + default-name: evaluation/input.parse-relaxed + pass-unless: input-parse-relaxer + pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval + template: $input-parse-relaxer < IN > OUT filter in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table out: filtered-dir From 61f5b49deed593504b0060ab09948fb3e5c0f252 Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Mon, 24 Apr 2017 13:29:39 +0100 Subject: [PATCH 164/176] fix rdlm training - train_host option was missing --- scripts/training/rdlm/train_rdlm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/training/rdlm/train_rdlm.py b/scripts/training/rdlm/train_rdlm.py index 289ab405c..51dec6dae 100755 --- a/scripts/training/rdlm/train_rdlm.py +++ b/scripts/training/rdlm/train_rdlm.py @@ -102,6 +102,9 @@ parser.add_argument( parser.add_argument( "--mmap", dest="mmap", action="store_true", help="Use memory-mapped file (for lower memory consumption).") +parser.add_argument( + "--train-host", dest="train_host", + help="Execute nplm training on this host, via ssh") parser.set_defaults( From ae476ae5311db5fa61f9cebf4db6b241459f4751 Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Mon, 24 Apr 2017 15:30:17 +0100 Subject: [PATCH 165/176] fix rdlm training - extra-settings was missing --- scripts/training/rdlm/train_rdlm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/training/rdlm/train_rdlm.py b/scripts/training/rdlm/train_rdlm.py index 51dec6dae..7915e454c 100755 --- a/scripts/training/rdlm/train_rdlm.py +++ b/scripts/training/rdlm/train_rdlm.py @@ -105,6 +105,8 @@ parser.add_argument( parser.add_argument( "--train-host", dest="train_host", help="Execute nplm training on this host, via ssh") +parser.add_argument("--extra-settings", dest="extra_settings", + help="Extra settings to be passed to NPLM") parser.set_defaults( From 80791d1767db8560892b6414721b4ac1cbb1158b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 26 Apr 2017 13:10:32 +0100 Subject: [PATCH 166/176] rename Skeleton... to Example... --- contrib/other-builds/moses/.project | 100 +++++++++--------- ...onStatefulFF.cpp => ExampleStatefulFF.cpp} | 20 ++-- ...eletonStatefulFF.h => ExampleStatefulFF.h} | 12 +-- ...StatelessFF.cpp => ExampleStatelessFF.cpp} | 16 +-- ...etonStatelessFF.h => ExampleStatelessFF.h} | 4 +- moses/FF/Factory.cpp | 16 +-- moses/LM/{SkeletonLM.cpp => ExampleLM.cpp} | 8 +- moses/LM/{SkeletonLM.h => ExampleLM.h} | 6 +- moses/LM/Jamfile | 2 +- ....cpp => ChartRuleLookupManagerExample.cpp} | 20 ++-- ...eton.h => ChartRuleLookupManagerExample.h} | 12 +-- .../{SkeletonPT.cpp => ExamplePT.cpp} | 26 ++--- .../{SkeletonPT.h => ExamplePT.h} | 6 +- .../PhraseDictionaryMemoryPerSentence.cpp | 2 +- ...aseDictionaryMemoryPerSentenceOnDemand.cpp | 1 - .../PhraseDictionaryTransliteration.cpp | 1 - moses/TranslationModel/ProbingPT.cpp | 1 - 17 files changed, 125 insertions(+), 128 deletions(-) rename moses/FF/{SkeletonStatefulFF.cpp => ExampleStatefulFF.cpp} (79%) rename moses/FF/{SkeletonStatefulFF.h => ExampleStatefulFF.h} (90%) rename moses/FF/{SkeletonStatelessFF.cpp => ExampleStatelessFF.cpp} (69%) rename moses/FF/{SkeletonStatelessFF.h => ExampleStatelessFF.h} (92%) rename moses/LM/{SkeletonLM.cpp => ExampleLM.cpp} (84%) rename moses/LM/{SkeletonLM.h => ExampleLM.h} (67%) rename moses/TranslationModel/CYKPlusParser/{ChartRuleLookupManagerSkeleton.cpp => ChartRuleLookupManagerExample.cpp} (86%) rename moses/TranslationModel/CYKPlusParser/{ChartRuleLookupManagerSkeleton.h => ChartRuleLookupManagerExample.h} (85%) rename moses/TranslationModel/{SkeletonPT.cpp => ExamplePT.cpp} (70%) rename moses/TranslationModel/{SkeletonPT.h => ExamplePT.h} (81%) diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index d7e99c253..68caf84b3 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1200,6 +1200,26 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/EditOps.h + + FF/ExampleStatefulFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/ExampleStatefulFF.cpp + + + FF/ExampleStatefulFF.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/ExampleStatefulFF.h + + + FF/ExampleStatelessFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/ExampleStatelessFF.cpp + + + FF/ExampleStatelessFF.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/ExampleStatelessFF.h + FF/FFState.cpp 1 @@ -1420,26 +1440,6 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h - - FF/SkeletonStatefulFF.cpp - 1 - PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatefulFF.cpp - - - FF/SkeletonStatefulFF.h - 1 - PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatefulFF.h - - - FF/SkeletonStatelessFF.cpp - 1 - PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatelessFF.cpp - - - FF/SkeletonStatelessFF.h - 1 - PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatelessFF.h - FF/SoftMatchingFeature.cpp 1 @@ -1695,6 +1695,16 @@ 1 PARENT-3-PROJECT_LOC/moses/LM/DALMWrapper.h + + LM/ExampleLM.cpp + 1 + PARENT-3-PROJECT_LOC/moses/LM/ExampleLM.cpp + + + LM/ExampleLM.h + 1 + PARENT-3-PROJECT_LOC/moses/LM/ExampleLM.h + LM/IRST.cpp 1 @@ -1845,16 +1855,6 @@ 1 PARENT-3-PROJECT_LOC/moses/LM/SingleFactor.h - - LM/SkeletonLM.cpp - 1 - PARENT-3-PROJECT_LOC/moses/LM/SkeletonLM.cpp - - - LM/SkeletonLM.h - 1 - PARENT-3-PROJECT_LOC/moses/LM/SkeletonLM.h - LM/backward.arpa 1 @@ -2140,6 +2140,16 @@ 2 virtual:/virtual + + TranslationModel/ExamplePT.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ExamplePT.cpp + + + TranslationModel/ExamplePT.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/ExamplePT.h + TranslationModel/PhraseDictionary.cpp 1 @@ -2290,16 +2300,6 @@ 2 virtual:/virtual - - TranslationModel/SkeletonPT.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.cpp - - - TranslationModel/SkeletonPT.h - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.h - TranslationModel/UG 2 @@ -3135,6 +3135,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h + + TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp + + + TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h + TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp 1 @@ -3165,16 +3175,6 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h - - TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp - - - TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h - TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp 1 diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/ExampleStatefulFF.cpp similarity index 79% rename from moses/FF/SkeletonStatefulFF.cpp rename to moses/FF/ExampleStatefulFF.cpp index 2acaf2d2e..5a53c4f87 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/ExampleStatefulFF.cpp @@ -1,5 +1,5 @@ #include -#include "SkeletonStatefulFF.h" +#include "ExampleStatefulFF.h" #include "moses/ScoreComponentCollection.h" #include "moses/Hypothesis.h" @@ -9,7 +9,7 @@ namespace Moses { //////////////////////////////////////////////////////////////// -SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line) +ExampleStatefulFF::ExampleStatefulFF(const std::string &line) :StatefulFeatureFunction(3, line) { ReadParameters(); @@ -19,7 +19,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line) // An empty implementation of this function is provided by StatefulFeatureFunction. // Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. -void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source +void ExampleStatefulFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const @@ -28,7 +28,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source // An empty implementation of this function is provided by StatefulFeatureFunction. // Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. -void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input +void ExampleStatefulFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec @@ -39,11 +39,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input // An empty implementation of this function is provided by StatefulFeatureFunction. // Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. -void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext +void ExampleStatefulFF::EvaluateTranslationOptionListWithSourceContext (const InputType &input, const TranslationOptionList &translationOptionList) const {} -FFState* SkeletonStatefulFF::EvaluateWhenApplied( +FFState* ExampleStatefulFF::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const @@ -59,18 +59,18 @@ FFState* SkeletonStatefulFF::EvaluateWhenApplied( accumulator->PlusEquals(this, "sparse-name", 2.4); // int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG] - return new SkeletonState(0); + return new ExampleState(0); } -FFState* SkeletonStatefulFF::EvaluateWhenApplied( +FFState* ExampleStatefulFF::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const { - return new SkeletonState(0); + return new ExampleState(0); } -void SkeletonStatefulFF::SetParameter(const std::string& key, const std::string& value) +void ExampleStatefulFF::SetParameter(const std::string& key, const std::string& value) { if (key == "arg") { // set value here diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/ExampleStatefulFF.h similarity index 90% rename from moses/FF/SkeletonStatefulFF.h rename to moses/FF/ExampleStatefulFF.h index 7544ddd30..d66274295 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/ExampleStatefulFF.h @@ -7,11 +7,11 @@ namespace Moses { -class SkeletonState : public FFState +class ExampleState : public FFState { int m_targetLen; public: - SkeletonState(int targetLen) + ExampleState(int targetLen) :m_targetLen(targetLen) { } @@ -19,22 +19,22 @@ public: return (size_t) m_targetLen; } virtual bool operator==(const FFState& o) const { - const SkeletonState& other = static_cast(o); + const ExampleState& other = static_cast(o); return m_targetLen == other.m_targetLen; } }; -class SkeletonStatefulFF : public StatefulFeatureFunction +class ExampleStatefulFF : public StatefulFeatureFunction { public: - SkeletonStatefulFF(const std::string &line); + ExampleStatefulFF(const std::string &line); bool IsUseable(const FactorMask &mask) const { return true; } virtual const FFState* EmptyHypothesisState(const InputType &input) const { - return new SkeletonState(0); + return new ExampleState(0); } // An empty implementation of this function is provided by StatefulFeatureFunction. diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/ExampleStatelessFF.cpp similarity index 69% rename from moses/FF/SkeletonStatelessFF.cpp rename to moses/FF/ExampleStatelessFF.cpp index 8474efe76..0e62ad0ad 100644 --- a/moses/FF/SkeletonStatelessFF.cpp +++ b/moses/FF/ExampleStatelessFF.cpp @@ -1,5 +1,5 @@ #include -#include "SkeletonStatelessFF.h" +#include "ExampleStatelessFF.h" #include "moses/ScoreComponentCollection.h" #include "moses/TargetPhrase.h" @@ -7,13 +7,13 @@ using namespace std; namespace Moses { -SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line) +ExampleStatelessFF::ExampleStatelessFF(const std::string &line) :StatelessFeatureFunction(2, line) { ReadParameters(); } -void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source +void ExampleStatelessFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const @@ -29,7 +29,7 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source } -void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input +void ExampleStatelessFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec @@ -43,20 +43,20 @@ void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input } } -void SkeletonStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input +void ExampleStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input , const TranslationOptionList &translationOptionList) const {} -void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo, +void ExampleStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} -void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo, +void ExampleStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} -void SkeletonStatelessFF::SetParameter(const std::string& key, const std::string& value) +void ExampleStatelessFF::SetParameter(const std::string& key, const std::string& value) { if (key == "arg") { // set value here diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/ExampleStatelessFF.h similarity index 92% rename from moses/FF/SkeletonStatelessFF.h rename to moses/FF/ExampleStatelessFF.h index 0dc46e214..e1f007d21 100644 --- a/moses/FF/SkeletonStatelessFF.h +++ b/moses/FF/ExampleStatelessFF.h @@ -6,10 +6,10 @@ namespace Moses { -class SkeletonStatelessFF : public StatelessFeatureFunction +class ExampleStatelessFF : public StatelessFeatureFunction { public: - SkeletonStatelessFF(const std::string &line); + ExampleStatelessFF(const std::string &line); bool IsUseable(const FactorMask &mask) const { return true; diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 5d37c4fed..de184ee04 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -65,13 +65,13 @@ #include "SyntaxRHS.h" #include "DeleteRules.h" -#include "moses/FF/SkeletonStatelessFF.h" -#include "moses/FF/SkeletonStatefulFF.h" -#include "moses/LM/SkeletonLM.h" +#include "moses/FF/ExampleStatelessFF.h" +#include "moses/FF/ExampleStatefulFF.h" +#include "moses/LM/ExampleLM.h" #include "moses/LM/InMemoryPerSentenceOnDemandLM.h" #include "moses/FF/SkeletonTranslationOptionListFeature.h" #include "moses/LM/BilingualLM.h" -#include "moses/TranslationModel/SkeletonPT.h" +#include "moses/TranslationModel/ExamplePT.h" #include "moses/Syntax/InputWeightFF.h" #include "moses/Syntax/RuleTableFF.h" @@ -297,12 +297,12 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(UnalignedWordCountFeature); MOSES_FNAME(DeleteRules); - MOSES_FNAME(SkeletonStatelessFF); - MOSES_FNAME(SkeletonStatefulFF); - MOSES_FNAME(SkeletonLM); + MOSES_FNAME(ExampleStatelessFF); + MOSES_FNAME(ExampleStatefulFF); + MOSES_FNAME(ExampleLM); MOSES_FNAME(InMemoryPerSentenceOnDemandLM); MOSES_FNAME(SkeletonTranslationOptionListFeature); - MOSES_FNAME(SkeletonPT); + MOSES_FNAME(ExamplePT); MOSES_FNAME(EditOps); MOSES_FNAME(CorrectionPattern); diff --git a/moses/LM/SkeletonLM.cpp b/moses/LM/ExampleLM.cpp similarity index 84% rename from moses/LM/SkeletonLM.cpp rename to moses/LM/ExampleLM.cpp index f944de23a..034afef2e 100644 --- a/moses/LM/SkeletonLM.cpp +++ b/moses/LM/ExampleLM.cpp @@ -1,12 +1,12 @@ -#include "SkeletonLM.h" +#include "ExampleLM.h" #include "moses/FactorCollection.h" using namespace std; namespace Moses { -SkeletonLM::SkeletonLM(const std::string &line) +ExampleLM::ExampleLM(const std::string &line) :LanguageModelSingleFactor(line) { ReadParameters(); @@ -24,11 +24,11 @@ SkeletonLM::SkeletonLM(const std::string &line) m_sentenceEndWord[m_factorType] = m_sentenceEnd; } -SkeletonLM::~SkeletonLM() +ExampleLM::~ExampleLM() { } -LMResult SkeletonLM::GetValue(const vector &contextFactor, State* finalState) const +LMResult ExampleLM::GetValue(const vector &contextFactor, State* finalState) const { LMResult ret; ret.score = contextFactor.size(); diff --git a/moses/LM/SkeletonLM.h b/moses/LM/ExampleLM.h similarity index 67% rename from moses/LM/SkeletonLM.h rename to moses/LM/ExampleLM.h index 988c9def9..292462917 100644 --- a/moses/LM/SkeletonLM.h +++ b/moses/LM/ExampleLM.h @@ -7,13 +7,13 @@ namespace Moses { -class SkeletonLM : public LanguageModelSingleFactor +class ExampleLM : public LanguageModelSingleFactor { protected: public: - SkeletonLM(const std::string &line); - ~SkeletonLM(); + ExampleLM(const std::string &line); + ~ExampleLM(); virtual LMResult GetValue(const std::vector &contextFactor, State* finalState = 0) const; }; diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile index 4eafbd632..0c152d555 100644 --- a/moses/LM/Jamfile +++ b/moses/LM/Jamfile @@ -138,7 +138,7 @@ if $(with-dalm) { #Top-level LM library. If you've added a file that doesn't depend on external #libraries, put it here. -alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp +alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ExampleLM.cpp ../../lm//kenlm ..//headers $(dependencies) ; alias macros : : : : $(lmmacros) ; diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp similarity index 86% rename from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp rename to moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp index ca219f249..6c80e30af 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp @@ -18,7 +18,7 @@ ***********************************************************************/ #include -#include "ChartRuleLookupManagerSkeleton.h" +#include "ChartRuleLookupManagerExample.h" #include "DotChartInMemory.h" #include "moses/Util.h" @@ -29,29 +29,29 @@ #include "moses/NonTerminal.h" #include "moses/ChartCellCollection.h" #include "moses/TranslationModel/PhraseDictionaryMemory.h" -#include "moses/TranslationModel/SkeletonPT.h" +#include "moses/TranslationModel/ExamplePT.h" using namespace std; namespace Moses { -ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton( +ChartRuleLookupManagerExample::ChartRuleLookupManagerExample( const ChartParser &parser, const ChartCellCollectionBase &cellColl, - const SkeletonPT &skeletonPt) + const ExamplePT &skeletonPt) : ChartRuleLookupManager(parser, cellColl) , m_skeletonPT(skeletonPt) { - cerr << "starting ChartRuleLookupManagerSkeleton" << endl; + cerr << "starting ChartRuleLookupManagerExample" << endl; } -ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton() +ChartRuleLookupManagerExample::~ChartRuleLookupManagerExample() { // RemoveAllInColl(m_tpColl); } -void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( +void ChartRuleLookupManagerExample::GetChartRuleCollection( const InputPath &inputPath, size_t last, ChartParserCallback &outColl) @@ -74,12 +74,12 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( } TargetPhrase * -ChartRuleLookupManagerSkeleton:: +ChartRuleLookupManagerExample:: CreateTargetPhrase(const Word &sourceWord) const { - // create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:' + // create a target phrase from the 1st word of the source, prefix with 'ChartManagerExample:' string str = sourceWord.GetFactor(0)->GetString().as_string(); - str = "ChartManagerSkeleton:" + str; + str = "ChartManagerExample:" + str; TargetPhrase *tp = new TargetPhrase(&m_skeletonPT); Word &word = tp->AddWord(); diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h similarity index 85% rename from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h rename to moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h index d01f3b9bd..7cc6337fa 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h @@ -29,16 +29,16 @@ class TargetPhraseCollection; class ChartParserCallback; class DottedRuleColl; class Range; -class SkeletonPT; +class ExamplePT; -class ChartRuleLookupManagerSkeleton : public ChartRuleLookupManager +class ChartRuleLookupManagerExample : public ChartRuleLookupManager { public: - ChartRuleLookupManagerSkeleton(const ChartParser &parser, + ChartRuleLookupManagerExample(const ChartParser &parser, const ChartCellCollectionBase &cellColl, - const SkeletonPT &skeletonPt); + const ExamplePT &skeletonPt); - ~ChartRuleLookupManagerSkeleton(); + ~ChartRuleLookupManagerExample(); virtual void GetChartRuleCollection( const InputPath &inputPath, @@ -50,7 +50,7 @@ private: StackVec m_stackVec; std::vector m_tpColl; - const SkeletonPT &m_skeletonPT; + const ExamplePT &m_skeletonPT; }; } // namespace Moses diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/ExamplePT.cpp similarity index 70% rename from moses/TranslationModel/SkeletonPT.cpp rename to moses/TranslationModel/ExamplePT.cpp index 6b42212f9..198ce2814 100644 --- a/moses/TranslationModel/SkeletonPT.cpp +++ b/moses/TranslationModel/ExamplePT.cpp @@ -1,29 +1,29 @@ // vim:tabstop=2 -#include "SkeletonPT.h" -#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" +#include "ExamplePT.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h" using namespace std; namespace Moses { -SkeletonPT::SkeletonPT(const std::string &line) +ExamplePT::ExamplePT(const std::string &line) : PhraseDictionary(line, true) { ReadParameters(); } -void SkeletonPT::Load(AllOptions::ptr const& opts) +void ExamplePT::Load(AllOptions::ptr const& opts) { m_options = opts; SetFeaturesToApply(); } -void SkeletonPT::InitializeForInput(ttasksptr const& ttask) +void ExamplePT::InitializeForInput(ttasksptr const& ttask) { ReduceCache(); } -void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +void ExamplePT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const { CacheColl &cache = GetCache(); @@ -46,14 +46,14 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu } } -TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const +TargetPhrase *ExamplePT::CreateTargetPhrase(const Phrase &sourcePhrase) const { - // create a target phrase from the 1st word of the source, prefix with 'SkeletonPT:' + // create a target phrase from the 1st word of the source, prefix with 'ExamplePT:' assert(sourcePhrase.GetSize()); assert(m_output.size() == 1); string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string(); - str = "SkeletonPT:" + str; + str = "ExamplePT:" + str; TargetPhrase *tp = new TargetPhrase(this); Word &word = tp->AddWord(); @@ -69,17 +69,17 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const return tp; } -ChartRuleLookupManager* SkeletonPT::CreateRuleLookupManager(const ChartParser &parser, +ChartRuleLookupManager* ExamplePT::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) { - return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this); + return new ChartRuleLookupManagerExample(parser, cellCollection, *this); } -TO_STRING_BODY(SkeletonPT); +TO_STRING_BODY(ExamplePT); // friend -ostream& operator<<(ostream& out, const SkeletonPT& phraseDict) +ostream& operator<<(ostream& out, const ExamplePT& phraseDict) { return out; } diff --git a/moses/TranslationModel/SkeletonPT.h b/moses/TranslationModel/ExamplePT.h similarity index 81% rename from moses/TranslationModel/SkeletonPT.h rename to moses/TranslationModel/ExamplePT.h index 443f1cc8e..6ec7764c9 100644 --- a/moses/TranslationModel/SkeletonPT.h +++ b/moses/TranslationModel/ExamplePT.h @@ -9,12 +9,12 @@ class ChartParser; class ChartCellCollectionBase; class ChartRuleLookupManager; -class SkeletonPT : public PhraseDictionary +class ExamplePT : public PhraseDictionary { - friend std::ostream& operator<<(std::ostream&, const SkeletonPT&); + friend std::ostream& operator<<(std::ostream&, const ExamplePT&); public: - SkeletonPT(const std::string &line); + ExamplePT(const std::string &line); void Load(AllOptions::ptr const& opts); diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp index 36a28089b..fc62f0679 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp @@ -1,6 +1,6 @@ // vim:tabstop=2 #include "PhraseDictionaryMemoryPerSentence.h" -#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h" using namespace std; diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp index 072e482de..acf834cbd 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -1,6 +1,5 @@ // vim:tabstop=2 #include "PhraseDictionaryMemoryPerSentenceOnDemand.h" -#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" #include using namespace std; diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp index 3d1664822..2ffe880c7 100644 --- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp +++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp @@ -2,7 +2,6 @@ #include #include "PhraseDictionaryTransliteration.h" -#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" #include "moses/DecodeGraph.h" #include "moses/DecodeStep.h" #include "util/tempfile.hh" diff --git a/moses/TranslationModel/ProbingPT.cpp b/moses/TranslationModel/ProbingPT.cpp index c513b7808..dca7835f5 100644 --- a/moses/TranslationModel/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT.cpp @@ -4,7 +4,6 @@ #include "moses/FactorCollection.h" #include "moses/TargetPhraseCollection.h" #include "moses/InputFileStream.h" -#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" #include "probingpt/querying.h" #include "probingpt/probing_hash_utils.h" From d3106b9d5af610c9294aa63226c978974ee96c92 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 26 Apr 2017 13:16:58 +0100 Subject: [PATCH 167/176] rename Skeleton... to Example... --- contrib/other-builds/moses/.project | 5 +++++ ...istFeature.h => ExampleTranslationOptionListFeature.h} | 4 ++-- moses/FF/Factory.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 6 deletions(-) rename moses/FF/{SkeletonTranslationOptionListFeature.h => ExampleTranslationOptionListFeature.h} (93%) diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 68caf84b3..a57a9df90 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1220,6 +1220,11 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/ExampleStatelessFF.h + + FF/ExampleTranslationOptionListFeature.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/ExampleTranslationOptionListFeature.h + FF/FFState.cpp 1 diff --git a/moses/FF/SkeletonTranslationOptionListFeature.h b/moses/FF/ExampleTranslationOptionListFeature.h similarity index 93% rename from moses/FF/SkeletonTranslationOptionListFeature.h rename to moses/FF/ExampleTranslationOptionListFeature.h index e47e691aa..7686eb3ff 100644 --- a/moses/FF/SkeletonTranslationOptionListFeature.h +++ b/moses/FF/ExampleTranslationOptionListFeature.h @@ -6,10 +6,10 @@ namespace Moses { -class SkeletonTranslationOptionListFeature : public StatelessFeatureFunction +class ExampleTranslationOptionListFeature : public StatelessFeatureFunction { public: - SkeletonTranslationOptionListFeature(const std::string &line) + ExampleTranslationOptionListFeature(const std::string &line) :StatelessFeatureFunction(1, line) { ReadParameters(); } diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index de184ee04..398d6593c 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -68,13 +68,13 @@ #include "moses/FF/ExampleStatelessFF.h" #include "moses/FF/ExampleStatefulFF.h" #include "moses/LM/ExampleLM.h" -#include "moses/LM/InMemoryPerSentenceOnDemandLM.h" -#include "moses/FF/SkeletonTranslationOptionListFeature.h" +#include "moses/FF/ExampleTranslationOptionListFeature.h" #include "moses/LM/BilingualLM.h" #include "moses/TranslationModel/ExamplePT.h" #include "moses/Syntax/InputWeightFF.h" #include "moses/Syntax/RuleTableFF.h" +#include "moses/LM/InMemoryPerSentenceOnDemandLM.h" #include "moses/FF/EditOps.h" #include "moses/FF/CorrectionPattern.h" @@ -300,10 +300,10 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(ExampleStatelessFF); MOSES_FNAME(ExampleStatefulFF); MOSES_FNAME(ExampleLM); - MOSES_FNAME(InMemoryPerSentenceOnDemandLM); - MOSES_FNAME(SkeletonTranslationOptionListFeature); + MOSES_FNAME(ExampleTranslationOptionListFeature); MOSES_FNAME(ExamplePT); + MOSES_FNAME(InMemoryPerSentenceOnDemandLM); MOSES_FNAME(EditOps); MOSES_FNAME(CorrectionPattern); From 6ca7ba5de1ea93dd7ee04f3060614d5152a29c2d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 26 Apr 2017 13:27:32 +0100 Subject: [PATCH 168/176] moses2 rename Skeleton... to Example... --- contrib/other-builds/moses2/.project | 40 +++++++++---------- ...onStatefulFF.cpp => ExampleStatefulFF.cpp} | 32 +++++++-------- ...eletonStatefulFF.h => ExampleStatefulFF.h} | 12 +++--- ...StatelessFF.cpp => ExampleStatelessFF.cpp} | 10 ++--- ...etonStatelessFF.h => ExampleStatelessFF.h} | 6 +-- moses2/FF/FeatureRegistry.cpp | 8 ++-- moses2/Jamfile | 4 +- 7 files changed, 55 insertions(+), 57 deletions(-) rename moses2/FF/{SkeletonStatefulFF.cpp => ExampleStatefulFF.cpp} (59%) rename moses2/FF/{SkeletonStatefulFF.h => ExampleStatefulFF.h} (82%) rename moses2/FF/{SkeletonStatelessFF.cpp => ExampleStatelessFF.cpp} (63%) rename moses2/FF/{SkeletonStatelessFF.h => ExampleStatelessFF.h} (80%) diff --git a/contrib/other-builds/moses2/.project b/contrib/other-builds/moses2/.project index 8677885e1..1588c88b6 100644 --- a/contrib/other-builds/moses2/.project +++ b/contrib/other-builds/moses2/.project @@ -357,6 +357,26 @@ 1 PARENT-3-PROJECT_LOC/moses2/FF/Distortion.h + + FF/ExampleStatefulFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatefulFF.cpp + + + FF/ExampleStatefulFF.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatefulFF.h + + + FF/ExampleStatelessFF.cpp + 1 + PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatelessFF.cpp + + + FF/ExampleStatelessFF.h + 1 + PARENT-3-PROJECT_LOC/moses2/FF/ExampleStatelessFF.h + FF/FFState.cpp 1 @@ -427,26 +447,6 @@ 1 PARENT-3-PROJECT_LOC/moses2/FF/PointerState.h - - FF/SkeletonStatefulFF.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatefulFF.cpp - - - FF/SkeletonStatefulFF.h - 1 - PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatefulFF.h - - - FF/SkeletonStatelessFF.cpp - 1 - PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatelessFF.cpp - - - FF/SkeletonStatelessFF.h - 1 - PARENT-3-PROJECT_LOC/moses2/FF/SkeletonStatelessFF.h - FF/StatefulFeatureFunction.cpp 1 diff --git a/moses2/FF/SkeletonStatefulFF.cpp b/moses2/FF/ExampleStatefulFF.cpp similarity index 59% rename from moses2/FF/SkeletonStatefulFF.cpp rename to moses2/FF/ExampleStatefulFF.cpp index c4c2b7329..86b364f53 100644 --- a/moses2/FF/SkeletonStatefulFF.cpp +++ b/moses2/FF/ExampleStatefulFF.cpp @@ -1,11 +1,11 @@ /* - * SkeletonStatefulFF.cpp + * ExampleStatefulFF.cpp * * Created on: 27 Oct 2015 * Author: hieu */ #include -#include "SkeletonStatefulFF.h" +#include "ExampleStatefulFF.h" #include "../PhraseBased/Manager.h" #include "../PhraseBased/Hypothesis.h" @@ -14,12 +14,12 @@ using namespace std; namespace Moses2 { -class SkeletonState: public FFState +class ExampleState: public FFState { public: int targetLen; - SkeletonState() { + ExampleState() { // uninitialised } @@ -27,7 +27,7 @@ public: return (size_t) targetLen; } virtual bool operator==(const FFState& o) const { - const SkeletonState& other = static_cast(o); + const ExampleState& other = static_cast(o); return targetLen == other.targetLen; } @@ -40,52 +40,52 @@ public: }; //////////////////////////////////////////////////////////////////////////////////////// -SkeletonStatefulFF::SkeletonStatefulFF(size_t startInd, const std::string &line) : +ExampleStatefulFF::ExampleStatefulFF(size_t startInd, const std::string &line) : StatefulFeatureFunction(startInd, line) { ReadParameters(); } -SkeletonStatefulFF::~SkeletonStatefulFF() +ExampleStatefulFF::~ExampleStatefulFF() { // TODO Auto-generated destructor stub } -FFState* SkeletonStatefulFF::BlankState(MemPool &pool, const System &sys) const +FFState* ExampleStatefulFF::BlankState(MemPool &pool, const System &sys) const { - return new (pool.Allocate()) SkeletonState(); + return new (pool.Allocate()) ExampleState(); } -void SkeletonStatefulFF::EmptyHypothesisState(FFState &state, +void ExampleStatefulFF::EmptyHypothesisState(FFState &state, const ManagerBase &mgr, const InputType &input, const Hypothesis &hypo) const { - SkeletonState &stateCast = static_cast(state); + ExampleState &stateCast = static_cast(state); stateCast.targetLen = 0; } -void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool, +void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { } -void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, +void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhrase &targetPhrase, Scores &scores, SCORE &estimatedScore) const { } -void SkeletonStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr, +void ExampleStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr, const Hypothesis &hypo, const FFState &prevState, Scores &scores, FFState &state) const { - SkeletonState &stateCast = static_cast(state); + ExampleState &stateCast = static_cast(state); stateCast.targetLen = hypo.GetTargetPhrase().GetSize(); } -void SkeletonStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr, +void ExampleStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr, const SCFG::Hypothesis &hypo, int featureID, Scores &scores, FFState &state) const { diff --git a/moses2/FF/SkeletonStatefulFF.h b/moses2/FF/ExampleStatefulFF.h similarity index 82% rename from moses2/FF/SkeletonStatefulFF.h rename to moses2/FF/ExampleStatefulFF.h index 79256f2b3..437f54515 100644 --- a/moses2/FF/SkeletonStatefulFF.h +++ b/moses2/FF/ExampleStatefulFF.h @@ -1,23 +1,22 @@ /* - * SkeletonStatefulFF.h + * ExampleStatefulFF.h * * Created on: 27 Oct 2015 * Author: hieu */ -#ifndef SKELETONSTATEFULFF_H_ -#define SKELETONSTATEFULFF_H_ +#pragma once #include "StatefulFeatureFunction.h" namespace Moses2 { -class SkeletonStatefulFF: public StatefulFeatureFunction +class ExampleStatefulFF: public StatefulFeatureFunction { public: - SkeletonStatefulFF(size_t startInd, const std::string &line); - virtual ~SkeletonStatefulFF(); + ExampleStatefulFF(size_t startInd, const std::string &line); + virtual ~ExampleStatefulFF(); virtual FFState* BlankState(MemPool &pool, const System &sys) const; virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr, @@ -45,4 +44,3 @@ public: } -#endif /* SKELETONSTATEFULFF_H_ */ diff --git a/moses2/FF/SkeletonStatelessFF.cpp b/moses2/FF/ExampleStatelessFF.cpp similarity index 63% rename from moses2/FF/SkeletonStatelessFF.cpp rename to moses2/FF/ExampleStatelessFF.cpp index 4875f155d..ab6260034 100644 --- a/moses2/FF/SkeletonStatelessFF.cpp +++ b/moses2/FF/ExampleStatelessFF.cpp @@ -6,31 +6,31 @@ */ #include "../Scores.h" -#include "SkeletonStatelessFF.h" +#include "ExampleStatelessFF.h" namespace Moses2 { -SkeletonStatelessFF::SkeletonStatelessFF(size_t startInd, +ExampleStatelessFF::ExampleStatelessFF(size_t startInd, const std::string &line) : StatelessFeatureFunction(startInd, line) { ReadParameters(); } -SkeletonStatelessFF::~SkeletonStatelessFF() +ExampleStatelessFF::~ExampleStatelessFF() { // TODO Auto-generated destructor stub } -void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool, +void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { } -void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, +void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhrase &targetPhrase, Scores &scores, SCORE &estimatedScore) const { diff --git a/moses2/FF/SkeletonStatelessFF.h b/moses2/FF/ExampleStatelessFF.h similarity index 80% rename from moses2/FF/SkeletonStatelessFF.h rename to moses2/FF/ExampleStatelessFF.h index f7e95005f..20b1acaaf 100644 --- a/moses2/FF/SkeletonStatelessFF.h +++ b/moses2/FF/ExampleStatelessFF.h @@ -12,11 +12,11 @@ namespace Moses2 { -class SkeletonStatelessFF: public StatelessFeatureFunction +class ExampleStatelessFF: public StatelessFeatureFunction { public: - SkeletonStatelessFF(size_t startInd, const std::string &line); - virtual ~SkeletonStatelessFF(); + ExampleStatelessFF(size_t startInd, const std::string &line); + virtual ~ExampleStatelessFF(); virtual void EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, diff --git a/moses2/FF/FeatureRegistry.cpp b/moses2/FF/FeatureRegistry.cpp index f7eae4205..3947d58c0 100644 --- a/moses2/FF/FeatureRegistry.cpp +++ b/moses2/FF/FeatureRegistry.cpp @@ -16,8 +16,8 @@ #include "WordPenalty.h" #include "OSM/OpSequenceModel.h" -#include "SkeletonStatefulFF.h" -#include "SkeletonStatelessFF.h" +#include "ExampleStatefulFF.h" +#include "ExampleStatelessFF.h" using namespace std; @@ -70,8 +70,8 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(WordPenalty); MOSES_FNAME(OpSequenceModel); - MOSES_FNAME(SkeletonStatefulFF); - MOSES_FNAME(SkeletonStatelessFF); + MOSES_FNAME(ExampleStatefulFF); + MOSES_FNAME(ExampleStatelessFF); } FeatureRegistry::~FeatureRegistry() diff --git a/moses2/Jamfile b/moses2/Jamfile index 886e81d8c..42676c065 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -47,8 +47,8 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem : : : $(max-factors FF/FeatureFunctions.cpp FF/FeatureRegistry.cpp FF/PhrasePenalty.cpp - FF/SkeletonStatefulFF.cpp - FF/SkeletonStatelessFF.cpp + FF/ExampleStatefulFF.cpp + FF/ExampleStatelessFF.cpp FF/StatefulFeatureFunction.cpp FF/StatelessFeatureFunction.cpp FF/WordPenalty.cpp From ac66370b80d99df64f9b1e8e806192a17eb6e584 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Thu, 27 Apr 2017 00:00:40 +0100 Subject: [PATCH 169/176] daily automatic beautifier --- .../CYKPlusParser/ChartRuleLookupManagerExample.h | 4 ++-- moses2/FF/ExampleStatelessFF.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h index 7cc6337fa..3b3f59ace 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h @@ -35,8 +35,8 @@ class ChartRuleLookupManagerExample : public ChartRuleLookupManager { public: ChartRuleLookupManagerExample(const ChartParser &parser, - const ChartCellCollectionBase &cellColl, - const ExamplePT &skeletonPt); + const ChartCellCollectionBase &cellColl, + const ExamplePT &skeletonPt); ~ChartRuleLookupManagerExample(); diff --git a/moses2/FF/ExampleStatelessFF.cpp b/moses2/FF/ExampleStatelessFF.cpp index ab6260034..29716aaf8 100644 --- a/moses2/FF/ExampleStatelessFF.cpp +++ b/moses2/FF/ExampleStatelessFF.cpp @@ -12,7 +12,7 @@ namespace Moses2 { ExampleStatelessFF::ExampleStatelessFF(size_t startInd, - const std::string &line) : + const std::string &line) : StatelessFeatureFunction(startInd, line) { ReadParameters(); From 2ea75d91dcd0455c9e44e6eaa836a668d577adc8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 27 Apr 2017 13:48:18 +0100 Subject: [PATCH 170/176] add new mteval script --- scripts/generic/mteval-v12.pl | 3 - scripts/generic/mteval-v13a.pl | 3 - scripts/generic/mteval-v14.pl | 1179 ++++++++++++++++++++++++++++++++ 3 files changed, 1179 insertions(+), 6 deletions(-) create mode 100644 scripts/generic/mteval-v14.pl diff --git a/scripts/generic/mteval-v12.pl b/scripts/generic/mteval-v12.pl index b4dfbf83a..2666c8012 100755 --- a/scripts/generic/mteval-v12.pl +++ b/scripts/generic/mteval-v12.pl @@ -1,7 +1,4 @@ #!/usr/bin/env perl -# -# This file is part of moses. Its use is licensed under the GNU Lesser General -# Public License version 2.1 or, at your option, any later version. use warnings; use strict; diff --git a/scripts/generic/mteval-v13a.pl b/scripts/generic/mteval-v13a.pl index 2e5d29ad5..92afcbd71 100755 --- a/scripts/generic/mteval-v13a.pl +++ b/scripts/generic/mteval-v13a.pl @@ -1,7 +1,4 @@ #!/usr/bin/env perl -# -# This file is part of moses. Its use is licensed under the GNU Lesser General -# Public License version 2.1 or, at your option, any later version. use warnings; use strict; diff --git a/scripts/generic/mteval-v14.pl b/scripts/generic/mteval-v14.pl new file mode 100644 index 000000000..84a7549ac --- /dev/null +++ b/scripts/generic/mteval-v14.pl @@ -0,0 +1,1179 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use utf8; +use Encode; +use XML::Twig; +use Sort::Naturally; + +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + + +################################# +# History: +# +# version 14 +# (2016-03-29 lukas.diduch@nist.gov) +# * Fixed warning message in case seg-id is a string, by sorting in correct order using Sort::Naturally. +# +# version 13b +# * Fixed die 'bug' in case seg->id = 0 +# +# version 13a +# * modified the scoring functions to prevent division-by-zero errors when a system segment is empty +# * affected methods: 'bleu_score' and 'bleu_score_smoothing' +# +# version 13 +# * Uses a XML parser to read data (only when extension is .xml) +# * Smoothing of the segment-level BLEU scores, done by default +# * smoothing method similar to that of bleu-1.04.pl (IBM) +# * see comments above the 'bleu_score' method for more details on how the smoothing is computed +# * added a '--no-smoothing' option to simulate old scripts behavior +# * Introduction of the 'brevity-penalty' option, taking one of two values: +# * 'closest' (default) : act as IBM BLEU (taking the closest reference translation length) +# * in case two reference translations are at the same distance, will take the shortest one +# * for more details regarding how the BP is computed, see comments of the 'brevity_penalty_closest' function +# * 'shortest' : act as previous versions of the script (taking shortest reference translation length) +# * Introduction of the 'international-tokenization' option, boolean, disabled by default +# by default (when the option is not provided), uses 11b's tokenization function +# when option specified, uses v12's tokenization function +# * Introduction of a 'Metrics MATR output' flag (option '--metricsMATR') +# when used, creates three files for both BLEU score and NIST score: +# * BLEU-seg.scr and NIST-seg.scr: contain segment-level scores +# * BLEU-doc.scr and NIST-doc.scr: contain document-level scores +# * BLEU-sys.scr and NIST-sys.scr: contain system-level scores +# * SGML parsing +# * script will halt if source, reference and test files don't share the same setid attribute value (used for metricsMATR output) +# * correct segment IDs extracted from the files (was previously using an array, and using the index as a segID for output) +# * detailed output flag (-d) can now be used when running both BLEU and NIST +# +# version 12 +# * Text normalization changes: +# * convert entity references (only the entities declared in the DTD) +# * now uses unicode categories +# * tokenize punctuation unless followed AND preceded by digits +# * tokenize symbols +# * UTF-8 handling: +# * files are now read using utf8 mode +# * Added the '-e' command-line option to enclose non-ASCII characters between spaces +# +# version 11b -- text normalization modified: +# * take out the join digit line because it joins digits +# when it shouldn't have +# $norm_text =~ s/(\d)\s+(?=\d)/$1/g; #join digits +# +# version 11a -- corrected output of individual n-gram precision values +# +# version 11 -- bug fixes: +# * make filehandle operate in binary mode to prevent Perl from operating +# (by default in Red Hat 9) in UTF-8 +# * fix failure on joining digits +# version 10 -- updated output to include more details of n-gram scoring. +# Defaults to generate both NIST and BLEU scores. Use -b for BLEU +# only, use -n for NIST only +# +# version 09d -- bug fix (for BLEU scoring, ngrams were fixed at 4 +# being the max, regardless what was entered on the command line.) +# +# version 09c -- bug fix (During the calculation of ngram information, +# each ngram was being counted only once for each segment. This has +# been fixed so that each ngram is counted correctly in each segment.) +# +# version 09b -- text normalization modified: +# * option flag added to preserve upper case +# * non-ASCII characters left in place. +# +# version 09a -- text normalization modified: +# * " and & converted to "" and &, respectively +# * non-ASCII characters kept together (bug fix) +# +# version 09 -- modified to accommodate sgml tag and attribute +# names revised to conform to default SGML conventions. +# +# version 08 -- modifies the NIST metric in accordance with the +# findings on the 2001 Chinese-English dry run corpus. Also +# incorporates the BLEU metric as an option and supports the +# output of ngram detail. +# +# version 07 -- in response to the MT meeting on 28 Jan 2002 at ISI +# Keep strings of non-ASCII characters together as one word +# (rather than splitting them into one-character words). +# Change length penalty so that translations that are longer than +# the average reference translation are not penalized. +# +# version 06 +# Prevent divide-by-zero when a segment has no evaluation N-grams. +# Correct segment index for level 3 debug output. +# +# version 05 +# improve diagnostic error messages +# +# version 04 +# tag segments +# +# version 03 +# add detailed output option (intermediate document and segment scores) +# +# version 02 +# accommodation of modified sgml tags and attributes +# +# version 01 +# same as bleu version 15, but modified to provide formal score output. +# +# original IBM version +# Author: Kishore Papineni +# Date: 06/10/2001 +################################# + +###### +# Intro +my ($date, $time) = date_time_stamp(); +print "MT evaluation scorer began on $date at $time\n"; +print "\ncommand line: ", $0, " ", join(" ", @ARGV), "\n"; +my $usage = "\n\nUsage: $0 -r -s -t \n\n". + "Description: This Perl script evaluates MT system performance.\n". + "\n". + "Required arguments:\n". + " -r is a file containing the reference translations for\n". + " the documents to be evaluated.\n". + " -s is a file containing the source documents for which\n". + " translations are to be evaluated\n". + " -t is a file containing the translations to be evaluated\n". + "\n". + "Optional arguments:\n". + " -h prints this help message to STDOUT\n". + " -c preserves upper-case alphabetic characters\n". + " -b generate BLEU scores only\n". + " -n generate NIST scores only\n". + " -d detailed output flag:\n". + " 0 (default) for system-level score only\n". + " 1 to include document-level scores\n". + " 2 to include segment-level scores\n". + " 3 to include ngram-level scores\n". + " -e enclose non-ASCII characters between spaces\n". + " --brevity-penalty ( closest | shortest )\n" . + " closest (default) : acts as IBM BLEU (takes the closest reference translation length)\n" . + " shortest : acts as previous versions of the script (takes the shortest reference translation length)\n" . + " --international-tokenization\n" . + " when specified, uses Unicode-based (only) tokenization rules\n" . + " when not specified (default), uses default tokenization (some language-dependant rules)\n" . + " --metricsMATR : create three files for both BLEU scores and NIST scores:\n" . + " BLEU-seg.scr and NIST-seg.scr : segment-level scores\n" . + " BLEU-doc.scr and NIST-doc.scr : document-level scores\n" . + " BLEU-sys.scr and NIST-sys.scr : system-level scores\n" . + " --no-smoothing : disable smoothing on BLEU scores\n" . + "\n"; + +use vars qw ($opt_r $opt_s $opt_t $opt_d $opt_h $opt_b $opt_n $opt_c $opt_x $opt_e); +use Getopt::Long; +my $ref_file = ''; +my $src_file = ''; +my $tst_file = ''; +my $detail = 0; +my $help = ''; +my $preserve_case = ''; +my $split_non_ASCII = ''; +my $brevity_penalty = 'closest'; +my $international_tokenization; +my $metricsMATR_output = ''; +my $no_smoothing = ''; +our $opt_x = ''; +our $opt_b = ''; +our $opt_n = ''; +GetOptions( + 'r=s' => \$ref_file, + 's=s' => \$src_file, + 't=s' => \$tst_file, + 'd:i' => \$detail, + 'h|help' => \$help, + 'b', + 'n', + 'c' => \$preserve_case, + 'x:s', + 'e' => \$split_non_ASCII, + 'brevity-penalty:s' => \$brevity_penalty, + 'international-tokenization' => \$international_tokenization, + 'metricsMATR-output' => \$metricsMATR_output, + 'no-smoothing' => \$no_smoothing +); +die $usage if $help; + +die "Error in command line: ref_file not defined$usage" unless ( $ref_file ); +die "Error in command line: src_file not defined$usage" unless ( $src_file ); +die "Error in command line: tst_file not defined$usage" unless ( $tst_file ); +my $BLEU_BP; +if ( !( $brevity_penalty cmp 'closest' ) ) +{ + $BLEU_BP = \&brevity_penalty_closest; +} +elsif ( !( $brevity_penalty cmp 'shortest' ) ) +{ + $BLEU_BP = \&brevity_penalty_shortest; +} +else +{ + die "Incorrect value supplied for 'brevity_penalty'$usage"; +} +my $TOKENIZATION = \&tokenization; +$TOKENIZATION = \&tokenization_international if ( $international_tokenization ); + +my $BLEU_SCORE = \&bleu_score; +$BLEU_SCORE = \&bleu_score_nosmoothing if ( $no_smoothing ); + +my $max_Ngram = 9; + +my $METHOD = "BOTH"; +if ( $opt_b ) { $METHOD = "BLEU"; } +if ( $opt_n ) { $METHOD = "NIST"; } +my $method; + +###### +# Global variables +my ($src_lang, $tgt_lang, @tst_sys, @ref_sys); # evaluation parameters +my (%tst_data, %ref_data); # the data -- with structure: {system}{document}{segments} +my ($src_id, $ref_id, $tst_id); # unique identifiers for ref and tst translation sets +my %eval_docs; # document information for the evaluation data set +my %ngram_info; # the information obtained from (the last word in) the ngram + +###### +# Get source document ID's +($src_id) = get_source_info ($src_file); + +###### +# Get reference translations +($ref_id) = get_MT_data (\%ref_data, "RefSet", $ref_file); + +compute_ngram_info (); + +###### +# Get translations to evaluate +($tst_id) = get_MT_data (\%tst_data, "TstSet", $tst_file); + +###### +# Check data for completeness and correctness +check_MT_data (); + +###### +# +my %NISTmt; +my %NISTOverall; +my %BLEUmt; +my %BLEUOverall; + +###### +# Evaluate +print "\nEvaluation of $src_lang-to-$tgt_lang translation using:\n"; +my $cum_seg = 0; +foreach my $doc (sort keys %eval_docs) +{ + $cum_seg += scalar( keys( %{$eval_docs{$doc}{SEGS}} ) ); +} +print " src set \"$src_id\" (", scalar keys %eval_docs, " docs, $cum_seg segs)\n"; +print " ref set \"$ref_id\" (", scalar keys %ref_data, " refs)\n"; +print " tst set \"$tst_id\" (", scalar keys %tst_data, " systems)\n\n"; + +foreach my $sys (sort @tst_sys) +{ + for (my $n=1; $n<=$max_Ngram; $n++) + { + $NISTmt{$n}{$sys}{cum} = 0; + $NISTmt{$n}{$sys}{ind} = 0; + $BLEUmt{$n}{$sys}{cum} = 0; + $BLEUmt{$n}{$sys}{ind} = 0; + } + if ( ($METHOD eq "BOTH") || ($METHOD eq "NIST") ) + { + $method="NIST"; + score_system ($sys, \%NISTmt, \%NISTOverall); + } + if ( ($METHOD eq "BOTH") || ($METHOD eq "BLEU") ) + { + $method="BLEU"; + score_system ($sys, \%BLEUmt, \%BLEUOverall); + } +} + +###### +printout_report (); +if ( $metricsMATR_output ) +{ + outputMetricsMATR( 'NIST', %NISTOverall ) if ( ( $METHOD eq 'BOTH' ) || ( $METHOD eq 'NIST' ) ); + outputMetricsMATR( 'BLEU', %BLEUOverall ) if ( ( $METHOD eq 'BOTH' ) || ( $METHOD eq 'BLEU' ) ); +} + +($date, $time) = date_time_stamp(); +print "\nMT evaluation scorer ended on $date at $time\n"; + +exit 0; + +################################# + +sub get_source_info +{ + my ($file) = @_; + my ($name, $id, $src, $doc, $seg); + my ($data, $tag, $span); + + # Extension of the file determines the parser used: + # .xml : XML::Twig + # otherwise : simple SGML parsing functions + if ( $file =~ /\.xml$/i ) + { + my $twig = XML::Twig->new(); + $twig->parsefile( $file ); + my $root = $twig->root; + my $currentSet = $root->first_child( 'srcset' ); + die "Source XML file '$file' does not contain the 'srcset' element" if ( not $currentSet ); + $id = $currentSet->{ 'att' }->{ 'setid' } or die "No 'setid' attribute value in '$file'"; + $src = $currentSet->{ 'att' }->{ 'srclang' } or die "No srcset 'srclang' attribute value in '$file'"; + die "Not the same srclang attribute values across sets" unless ( not defined $src_lang or $src eq $src_lang ); + $src_lang = $src; + foreach my $currentDoc ( $currentSet->get_xpath( './/doc' ) ) + { + my $docID = $currentDoc->{ 'att' }->{ 'docid' } or die "No document 'docid' attribute value in '$file'"; + foreach my $currentSeg ( $currentDoc->get_xpath( './/seg' ) ) + { + + my $segID = $currentSeg->{ 'att' }->{ 'id' }; + die "No segment 'id' attribute value in '$file'" if (! defined $segID); + my $segData = $currentSeg->text; + ($eval_docs{$docID}{SEGS}{$segID}) = &{ $TOKENIZATION }( $segData ); + } + } + } + else + { + #read data from file + open (FILE, $file) or die "\nUnable to open translation data file '$file'", $usage; + binmode FILE, ":utf8"; + $data .= $_ while ; + close (FILE); + + #get source set info + die "\n\nFATAL INPUT ERROR: no 'src_set' tag in src_file '$file'\n\n" + unless ($tag, $span, $data) = extract_sgml_tag_and_span ("SrcSet", $data); + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($id) = extract_sgml_tag_attribute ($name="SetID", $tag); + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($src) = extract_sgml_tag_attribute ($name="SrcLang", $tag); + die "\n\nFATAL INPUT ERROR: $name ('$src') in file '$file' inconsistent\n" + ." with $name in previous input data ('$src_lang')\n\n" + unless (not defined $src_lang or $src eq $src_lang); + $src_lang = $src; + + #get doc info -- ID and # of segs + $data = $span; + while (($tag, $span, $data) = extract_sgml_tag_and_span ("Doc", $data)) + { + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($doc) = extract_sgml_tag_attribute ($name="DocID", $tag); + die "\n\nFATAL INPUT ERROR: duplicate '$name' in file '$file'\n\n" + if defined $eval_docs{$doc}; + $span =~ s/[\s\n\r]+/ /g; # concatenate records + my $nseg=0, my $seg_data = $span; + while (($tag, $span, $seg_data) = extract_sgml_tag_and_span ("Seg", $seg_data)) + { + die "\n\nFATAL INPUT ERROR: no attribute '$name' in file '$file'\n\n" + unless ($seg) = extract_sgml_tag_attribute( $name='id', $tag ); + ($eval_docs{$doc}{SEGS}{$seg}) = &{ $TOKENIZATION }( $span ); + $nseg++; + } + die "\n\nFATAL INPUT ERROR: no segments in document '$doc' in file '$file'\n\n" + if $nseg == 0; + } + die "\n\nFATAL INPUT ERROR: no documents in file '$file'\n\n" + unless keys %eval_docs > 0; + } + return $id; +} + +################################# + +sub get_MT_data +{ + my ($docs, $set_tag, $file) = @_; + my ($name, $id, $src, $tgt, $sys, $doc, $seg); + my ($tag, $span, $data); + + # Extension of the file determines the parser used: + # .xml : XML::Twig + # otherwise : simple SGML parsing functions + if ( $file =~ /\.xml$/i ) + { + my $twig = XML::Twig->new(); + $twig->parsefile( $file ); + my $root = $twig->root; + foreach my $currentSet ( $root->get_xpath( 'refset' ), $root->get_xpath( 'tstset' ) ) + { + $id = $currentSet->{ 'att' }->{ 'setid' } or die "No 'setid' attribute value in '$file'"; + $src = $currentSet->{ 'att' }->{ 'srclang' } or die "No 'srclang' attribute value in '$file'"; + $tgt = $currentSet->{ 'att' }->{ 'trglang' } or die "No 'trglang' attribute value in '$file'"; + die "Not the same 'srclang' attribute value across sets" unless ( $src eq $src_lang ); + die "Not the same 'trglang' attribute value across sets" unless ( ( not defined $tgt_lang ) or ( $tgt = $tgt_lang ) ); + $tgt_lang = $tgt; + my $sys; + if ( $currentSet->name eq 'tstset' ) + { + $sys = $currentSet->{ 'att' }->{ 'sysid' } or die "No 'sysid' attribute value in '$file'"; + } + else + { + $sys = $currentSet->{ 'att' }->{ 'refid' } or die "No 'refid' attribute value in '$file'"; + } + foreach my $currentDoc ( $currentSet->get_xpath( './/doc' ) ) + { + my $docID = $currentDoc->{ 'att' }->{ 'docid' } or die "No document 'docid' attribute value in '$file'"; + $docs->{ $sys }{ $docID }{ FILE } = $file; + foreach my $currentSeg ( $currentDoc->get_xpath( './/seg' ) ) + { + my $segID = $currentSeg->{ 'att' }->{ 'id' }; + die "No segment 'id' attribute value in '$file'" if (! defined $segID); + my $segData = $currentSeg->text; + ($docs->{$sys}{$docID}{SEGS}{$segID}) = &{ $TOKENIZATION }( $segData ); + } + } + } + } + else + { + #read data from file + open (FILE, $file) or die "\nUnable to open translation data file '$file'", $usage; + binmode FILE, ":utf8"; + $data .= $_ while ; + close (FILE); + + #get tag info + while (($tag, $span, $data) = extract_sgml_tag_and_span ($set_tag, $data)) + { + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($id) = extract_sgml_tag_attribute ($name="SetID", $tag); + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($src) = extract_sgml_tag_attribute ($name="SrcLang", $tag); + die "\n\nFATAL INPUT ERROR: $name ('$src') in file '$file' inconsistent\n" + ." with $name of source ('$src_lang')\n\n" + unless $src eq $src_lang; + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless ($tgt) = extract_sgml_tag_attribute ($name="TrgLang", $tag); + die "\n\nFATAL INPUT ERROR: $name ('$tgt') in file '$file' inconsistent\n" + ." with $name of the evaluation ('$tgt_lang')\n\n" + unless (not defined $tgt_lang or $tgt eq $tgt_lang); + $tgt_lang = $tgt; + + my $mtdata = $span; + while (($tag, $span, $mtdata) = extract_sgml_tag_and_span ("Doc", $mtdata)) + { + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless (my $sys) = extract_sgml_tag_attribute ($name="SysID", $tag); + die "\n\nFATAL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless $doc = extract_sgml_tag_attribute ($name="DocID", $tag); + die "\n\nFATAL INPUT ERROR: document '$doc' for system '$sys' in file '$file'\n" + ." previously loaded from file '$docs->{$sys}{$doc}{FILE}'\n\n" + unless (not defined $docs->{$sys}{$doc}); + + $span =~ s/[\s\n\r]+/ /g; # concatenate records + my $nseg=0, my $seg_data = $span; + while (($tag, $span, $seg_data) = extract_sgml_tag_and_span ("Seg", $seg_data)) + { + die "\n\nFATAIL INPUT ERROR: no tag attribute '$name' in file '$file'\n\n" + unless $seg = extract_sgml_tag_attribute( $name="id", $tag ); + ($docs->{$sys}{$doc}{SEGS}{$seg}) = &{ $TOKENIZATION }( $span ); + $nseg++; + } + die "\n\nFATAL INPUT ERROR: no segments in document '$doc' in file '$file'\n\n" if $nseg == 0; + $docs->{$sys}{$doc}{FILE} = $file; + } + } + } + return $id; +} + +################################# + +sub check_MT_data +{ + @tst_sys = sort keys %tst_data; + @ref_sys = sort keys %ref_data; + + die "Not the same 'setid' attribute values across files" unless ( ( $src_id eq $tst_id ) && ( $src_id eq $ref_id ) ); + +#every evaluation document must be represented for every system and every reference + foreach my $doc (sort keys %eval_docs) + { + my $nseg_source = scalar( keys( %{$eval_docs{$doc}{SEGS}} ) ); + foreach my $sys (@tst_sys) + { + die "\n\nFATAL ERROR: no document '$doc' for system '$sys'\n\n" unless defined $tst_data{$sys}{$doc}; + my $nseg = scalar( keys( %{$tst_data{$sys}{$doc}{SEGS}} ) ); + die "\n\nFATAL ERROR: translated documents must contain the same # of segments as the source, but\n" + ." document '$doc' for system '$sys' contains $nseg segments, while\n" + ." the source document contains $nseg_source segments.\n\n" + unless $nseg == $nseg_source; + } + foreach my $sys (@ref_sys) + { + die "\n\nFATAL ERROR: no document '$doc' for reference '$sys'\n\n" unless defined $ref_data{$sys}{$doc}; + my $nseg = scalar( keys( %{$ref_data{$sys}{$doc}{SEGS}} ) ); + die "\n\nFATAL ERROR: translated documents must contain the same # of segments as the source, but\n" + ." document '$doc' for system '$sys' contains $nseg segments, while\n" + ." the source document contains $nseg_source segments.\n\n" + unless $nseg == $nseg_source; + } + } +} + +################################# + +sub compute_ngram_info +{ + my ($ref, $doc, $seg); + my (@wrds, $tot_wrds, %ngrams, $ngram, $mgram); + my (%ngram_count, @tot_ngrams); + + foreach $ref (keys %ref_data) + { + foreach $doc (keys %{$ref_data{$ref}}) + { + foreach $seg ( keys %{$ref_data{$ref}{$doc}{SEGS}}) + { + @wrds = split /\s+/, $ref_data{ $ref }{ $doc }{ SEGS }{ $seg }; + $tot_wrds += @wrds; + %ngrams = %{Words2Ngrams (@wrds)}; + foreach $ngram (keys %ngrams) + { + $ngram_count{$ngram} += $ngrams{$ngram}; + } + } + } + } + + foreach $ngram (keys %ngram_count) + { + @wrds = split / /, $ngram; + pop @wrds, $mgram = join " ", @wrds; + $ngram_info{$ngram} = - log ($mgram ? $ngram_count{$ngram}/$ngram_count{$mgram} : $ngram_count{$ngram}/$tot_wrds) / log 2; + if (defined $opt_x and $opt_x eq "ngram info") + { + @wrds = split / /, $ngram; + printf "ngram info:%9.4f%6d%6d%8d%3d %s\n", $ngram_info{$ngram}, $ngram_count{$ngram}, + $mgram ? $ngram_count{$mgram} : $tot_wrds, $tot_wrds, scalar @wrds, $ngram; + } + } +} + +################################# + +sub score_system +{ + my ($sys, $ref, $doc, $SCOREmt, $overallScore); + ($sys, $SCOREmt, $overallScore) = @_; + my ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info); + my ($cum_ref_length, @cum_match, @cum_tst_cnt, @cum_ref_cnt, @cum_tst_info, @cum_ref_info); + + $cum_ref_length = 0; + for (my $j=1; $j<=$max_Ngram; $j++) + { + $cum_match[$j] = $cum_tst_cnt[$j] = $cum_ref_cnt[$j] = $cum_tst_info[$j] = $cum_ref_info[$j] = 0; + } + foreach $doc (sort keys %eval_docs) + { + ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info) = score_document ($sys, $doc, $overallScore); + if ( $method eq "NIST" ) + { + my %DOCmt = (); + my $docScore = nist_score( scalar( @ref_sys ), $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info, $sys, \%DOCmt ); + $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'score' } = $docScore; + if ( $detail >= 1 ) + { + printf "$method score using 5-grams = %.4f for system \"$sys\" on document \"$doc\" (%d segments, %d words)\n", + $docScore, scalar keys %{$tst_data{$sys}{$doc}{SEGS}}, $tst_cnt->[1]; + } + } + + if ( $method eq "BLEU" ) + { + my %DOCmt = (); + my $docScore = &{$BLEU_SCORE}( $ref_length, $match_cnt, $tst_cnt, $sys, \%DOCmt ); + $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'score' } = $docScore; + if ( $detail >= 1 ) + { + printf "$method score using 4-grams = %.4f for system \"$sys\" on document \"$doc\" (%d segments, %d words)\n", + $docScore, scalar keys %{$tst_data{$sys}{$doc}{SEGS}}, $tst_cnt->[1]; + } + } + + $cum_ref_length += $ref_length; + for (my $j=1; $j<=$max_Ngram; $j++) + { + $cum_match[$j] += $match_cnt->[$j]; + $cum_tst_cnt[$j] += $tst_cnt->[$j]; + $cum_ref_cnt[$j] += $ref_cnt->[$j]; + $cum_tst_info[$j] += $tst_info->[$j]; + $cum_ref_info[$j] += $ref_info->[$j]; + printf "document info: $sys $doc %d-gram %d %d %d %9.4f %9.4f\n", $j, $match_cnt->[$j], + $tst_cnt->[$j], $ref_cnt->[$j], $tst_info->[$j], $ref_info->[$j] + if (defined $opt_x and $opt_x eq "document info"); + } + } + + if ($method eq "BLEU") + { + $overallScore->{ $sys }{ 'score' } = &{$BLEU_SCORE}($cum_ref_length, \@cum_match, \@cum_tst_cnt, $sys, $SCOREmt); + } + if ($method eq "NIST") + { + $overallScore->{ $sys }{ 'score' } = nist_score (scalar @ref_sys, \@cum_match, \@cum_tst_cnt, \@cum_ref_cnt, \@cum_tst_info, \@cum_ref_info, $sys, $SCOREmt); + } +} + +################################# + +sub score_document +{ + my ($sys, $ref, $doc, $overallScore); + ($sys, $doc, $overallScore) = @_; + my ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info); + my ($cum_ref_length, @cum_match, @cum_tst_cnt, @cum_ref_cnt, @cum_tst_info, @cum_ref_info); + + $cum_ref_length = 0; + for (my $j=1; $j<=$max_Ngram; $j++) + { + $cum_match[$j] = $cum_tst_cnt[$j] = $cum_ref_cnt[$j] = $cum_tst_info[$j] = $cum_ref_info[$j] = 0; + } + + # score each segment + foreach my $seg ( nsort keys( %{$tst_data{$sys}{$doc}{SEGS}} ) ) + { + + my @ref_segments = (); + foreach $ref (@ref_sys) + { + push @ref_segments, $ref_data{$ref}{$doc}{SEGS}{$seg}; + if ( $detail >= 3 ) + { + printf "ref '$ref', seg $seg: %s\n", $ref_data{$ref}{$doc}{SEGS}{$seg} + } + + } + + printf "sys '$sys', seg $seg: %s\n", $tst_data{$sys}{$doc}{SEGS}{$seg} if ( $detail >= 3 ); + ($ref_length, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info) = score_segment ($tst_data{$sys}{$doc}{SEGS}{$seg}, @ref_segments); + + if ( $method eq "BLEU" ) + { + my %DOCmt = (); + my $segScore = &{$BLEU_SCORE}($ref_length, $match_cnt, $tst_cnt, $sys, %DOCmt); + $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' } = $segScore; + if ( $detail >= 2 ) + { + printf " $method score using 4-grams = %.4f for system \"$sys\" on segment $seg of document \"$doc\" (%d words)\n", $segScore, $tst_cnt->[1] + } + } + if ( $method eq "NIST" ) + { + my %DOCmt = (); + my $segScore = nist_score (scalar @ref_sys, $match_cnt, $tst_cnt, $ref_cnt, $tst_info, $ref_info, $sys, %DOCmt); + $overallScore->{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' } = $segScore; + if ( $detail >= 2 ) + { + printf " $method score using 5-grams = %.4f for system \"$sys\" on segment $seg of document \"$doc\" (%d words)\n", $segScore, $tst_cnt->[1]; + } + } + $cum_ref_length += $ref_length; + for (my $j=1; $j<=$max_Ngram; $j++) + { + $cum_match[$j] += $match_cnt->[$j]; + $cum_tst_cnt[$j] += $tst_cnt->[$j]; + $cum_ref_cnt[$j] += $ref_cnt->[$j]; + $cum_tst_info[$j] += $tst_info->[$j]; + $cum_ref_info[$j] += $ref_info->[$j]; + } + } + return ($cum_ref_length, [@cum_match], [@cum_tst_cnt], [@cum_ref_cnt], [@cum_tst_info], [@cum_ref_info]); +} + +############################################################################################################################### +# function returning the shortest reference length +# takes as input: +# - currentLength : the current (shortest) reference length +# - referenceSentenceLength : the current reference sentence length +# - candidateSentenceLength : the current candidate sentence length (unused) +############################################################################################################################### +sub brevity_penalty_shortest +{ + my ( $currentLength, $referenceSentenceLength, $candidateSentenceLength ) = @_; + return ( $referenceSentenceLength < $currentLength ? $referenceSentenceLength : $currentLength ); +} + +############################################################################################################################### +# function returning the closest reference length (to the candidate sentence length) +# takes as input: +# - currentLength: the current (closest) reference length. +# - candidateSentenceLength : the current reference sentence length +# - candidateSentenceLength : the current candidate sentence length +# when two reference sentences are at the same distance, it will return the shortest reference sentence length +# example of 4 iterations, given: +# - one candidate sentence containing 7 tokens +# - one reference translation containing 11 tokens +# - one reference translation containing 8 tokens +# - one reference translation containing 6 tokens +# - one reference translation containing 7 tokens +# the multiple invokations will return: +# - currentLength is set to 11 (outside of this function) +# - brevity_penalty_closest( 11, 8, 7 ) returns 8, since abs( 8 - 7 ) < abs( 11 - 7 ) +# - brevity_penalty_closest( 8, 6, 7 ) returns 6, since abs( 8 - 7 ) == abs( 6 - 7 ) AND 6 < 8 +# - brevity_penalty_closest( 7, 6, 7 ) returns 7, since abs( 7 - 7 ) < abs( 6 - 7 ) +############################################################################################################################### +sub brevity_penalty_closest +{ + my ( $currentLength, $referenceSentenceLength, $candidateSentenceLength ) = @_; + my $result = $currentLength; + if ( abs( $candidateSentenceLength - $referenceSentenceLength ) <= abs( $candidateSentenceLength - $currentLength ) ) + { + if ( abs( $candidateSentenceLength - $referenceSentenceLength ) == abs( $candidateSentenceLength - $currentLength ) ) + { + if ( $currentLength > $referenceSentenceLength ) + { + $result = $referenceSentenceLength; + } + } + else + { + $result = $referenceSentenceLength; + } + } + return $result; +} + +################################# + +sub score_segment +{ + my ($tst_seg, @ref_segs) = @_; + my (@tst_wrds, %tst_ngrams, @match_count, @tst_count, @tst_info); + my (@ref_wrds, $ref_seg, %ref_ngrams, %ref_ngrams_max, @ref_count, @ref_info); + my ($ngram); + my (@nwrds_ref); + my $ref_length; + + for (my $j=1; $j<= $max_Ngram; $j++) + { + $match_count[$j] = $tst_count[$j] = $ref_count[$j] = $tst_info[$j] = $ref_info[$j] = 0; + } + +# get the ngram counts for the test segment + @tst_wrds = split /\s+/, $tst_seg; + %tst_ngrams = %{Words2Ngrams (@tst_wrds)}; + for (my $j=1; $j<=$max_Ngram; $j++) + { + # compute ngram counts + $tst_count[$j] = $j<=@tst_wrds ? (@tst_wrds - $j + 1) : 0; + } + +# get the ngram counts for the reference segments + foreach $ref_seg (@ref_segs) + { + @ref_wrds = split /\s+/, $ref_seg; + %ref_ngrams = %{Words2Ngrams (@ref_wrds)}; + foreach $ngram (keys %ref_ngrams) + { + # find the maximum # of occurrences + my @wrds = split / /, $ngram; + $ref_info[@wrds] += $ngram_info{$ngram}; + $ref_ngrams_max{$ngram} = defined $ref_ngrams_max{$ngram} ? max ($ref_ngrams_max{$ngram}, $ref_ngrams{$ngram}) : $ref_ngrams{$ngram}; + } + for (my $j=1; $j<=$max_Ngram; $j++) + { + # update ngram counts + $ref_count[$j] += $j<=@ref_wrds ? (@ref_wrds - $j + 1) : 0; + } + if ( not defined( $ref_length ) ) + { + $ref_length = scalar( @ref_wrds ); + } + else + { + $ref_length = &{$BLEU_BP}( $ref_length, scalar( @ref_wrds ), scalar( @tst_wrds ) ); + } + } + +# accumulate scoring stats for tst_seg ngrams that match ref_seg ngrams + foreach $ngram (keys %tst_ngrams) + { + next unless defined $ref_ngrams_max{$ngram}; + my @wrds = split / /, $ngram; + $tst_info[@wrds] += $ngram_info{$ngram} * min($tst_ngrams{$ngram},$ref_ngrams_max{$ngram}); + $match_count[@wrds] += my $count = min($tst_ngrams{$ngram},$ref_ngrams_max{$ngram}); + printf "%.2f info for each of $count %d-grams = '%s'\n", $ngram_info{$ngram}, scalar @wrds, $ngram + if $detail >= 3; + } + + return ($ref_length, [@match_count], [@tst_count], [@ref_count], [@tst_info], [@ref_info]); +} + +################################# + +sub bleu_score_nosmoothing +{ + my ($ref_length, $matching_ngrams, $tst_ngrams, $sys, $SCOREmt) = @_; + my $score = 0; + my $iscore = 0; + + for ( my $j = 1; $j <= $max_Ngram; ++$j ) + { + if ($matching_ngrams->[ $j ] == 0) + { + $SCOREmt->{ $j }{ $sys }{ cum }=0; + } + else + { + my $len_score = min (0, 1-$ref_length/$tst_ngrams->[1]); + # Cumulative N-Gram score + $score += log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] ); + $SCOREmt->{ $j }{ $sys }{ cum } = exp( $score / $j + $len_score ); + # Individual N-Gram score + $iscore = log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] ); + $SCOREmt->{ $j }{ $sys }{ ind } = exp( $iscore ); + } + } + return $SCOREmt->{ 4 }{ $sys }{ cum }; +} + +############################################################################################################################### +# Default method used to compute the BLEU score, using smoothing. +# Note that the method used can be overridden using the '--no-smoothing' command-line argument +# The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each precision score whose matching n-gram count is null +# k is 1 for the first 'n' value for which the n-gram match count is null +# For example, if the text contains: +# - one 2-gram match +# - and (consequently) two 1-gram matches +# the n-gram count for each individual precision score would be: +# - n=1 => prec_count = 2 (two unigrams) +# - n=2 => prec_count = 1 (one bigram) +# - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1) +# - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2) +############################################################################################################################### +sub bleu_score +{ + my ($ref_length, $matching_ngrams, $tst_ngrams, $sys, $SCOREmt) = @_; + my $score = 0; + my $iscore = 0; + my $exp_len_score = 0; + $exp_len_score = exp( min (0, 1 - $ref_length / $tst_ngrams->[ 1 ] ) ) if ( $tst_ngrams->[ 1 ] > 0 ); + my $smooth = 1; + for ( my $j = 1; $j <= $max_Ngram; ++$j ) + { + if ( $tst_ngrams->[ $j ] == 0 ) + { + $iscore = 0; + } + elsif ( $matching_ngrams->[ $j ] == 0 ) + { + $smooth *= 2; + $iscore = log( 1 / ( $smooth * $tst_ngrams->[ $j ] ) ); + } + else + { + $iscore = log( $matching_ngrams->[ $j ] / $tst_ngrams->[ $j ] ); + } + $SCOREmt->{ $j }{ $sys }{ ind } = exp( $iscore ); + $score += $iscore; + $SCOREmt->{ $j }{ $sys }{ cum } = exp( $score / $j ) * $exp_len_score; + } + return $SCOREmt->{ 4 }{ $sys }{ cum }; +} + +################################# + +sub nist_score +{ + my ($nsys, $matching_ngrams, $tst_ngrams, $ref_ngrams, $tst_info, $ref_info, $sys, $SCOREmt) = @_; + my $score = 0; + my $iscore = 0; + + for (my $n=1; $n<=$max_Ngram; $n++) + { + $score += $tst_info->[$n]/max($tst_ngrams->[$n],1); + $SCOREmt->{$n}{$sys}{cum} = $score * nist_length_penalty($tst_ngrams->[1]/($ref_ngrams->[1]/$nsys)); + $iscore = $tst_info->[$n]/max($tst_ngrams->[$n],1); + $SCOREmt->{$n}{$sys}{ind} = $iscore * nist_length_penalty($tst_ngrams->[1]/($ref_ngrams->[1]/$nsys)); + } + return $SCOREmt->{5}{$sys}{cum}; +} + +################################# + +sub Words2Ngrams +{ + #convert a string of words to an Ngram count hash + my %count = (); + + for (; @_; shift) + { + my ($j, $ngram, $word); + for ($j=0; $j<$max_Ngram and defined($word=$_[$j]); $j++) + { + $ngram .= defined $ngram ? " $word" : $word; + $count{$ngram}++; + } + } + return {%count}; +} + +################################# + +sub tokenization +{ + my ($norm_text) = @_; + +# language-independent part: + $norm_text =~ s///g; # strip "skipped" tags + $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines + $norm_text =~ s/\n/ /g; # join lines + $norm_text =~ s/"/"/g; # convert SGML tag for quote to " + $norm_text =~ s/&/&/g; # convert SGML tag for ampersand to & + $norm_text =~ s/</ + $norm_text =~ s/>/>/g; # convert SGML tag for greater-than to < + +# language-dependent part (assuming Western languages): + $norm_text = " $norm_text "; + $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case; + $norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation + $norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit + $norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit + $norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit + $norm_text =~ s/\s+/ /g; # one space only between words + $norm_text =~ s/^\s+//; # no leading space + $norm_text =~ s/\s+$//; # no trailing space + + return $norm_text; +} + + +sub tokenization_international +{ + my ($norm_text) = @_; + + $norm_text =~ s///g; # strip "skipped" tags + #$norm_text =~ s/\p{Hyphen}\p{Zl}//g; # strip end-of-line hyphenation and join lines + $norm_text =~ s/\p{Zl}/ /g; # join lines + + # replace entities + $norm_text =~ s/"/\"/g; # quote to " + $norm_text =~ s/&/&/g; # ampersand to & + $norm_text =~ s/<//g; # greater-than to > + $norm_text =~ s/'/\'/g; # apostrophe to ' + + $norm_text = lc( $norm_text ) unless $preserve_case; # lowercasing if needed + $norm_text =~ s/([^[:ascii:]])/ $1 /g if ( $split_non_ASCII ); + + # punctuation: tokenize any punctuation unless followed AND preceded by a digit + $norm_text =~ s/(\P{N})(\p{P})/$1 $2 /g; + $norm_text =~ s/(\p{P})(\P{N})/ $1 $2/g; + + $norm_text =~ s/(\p{S})/ $1 /g; # tokenize symbols + + $norm_text =~ s/\p{Z}+/ /g; # one space only between words + $norm_text =~ s/^\p{Z}+//; # no leading space + $norm_text =~ s/\p{Z}+$//; # no trailing space + + return $norm_text; +} + +################################# + +sub nist_length_penalty +{ + my ($ratio) = @_; + return 1 if $ratio >= 1; + return 0 if $ratio <= 0; + my $ratio_x = 1.5; + my $score_x = 0.5; + my $beta = -log($score_x)/log($ratio_x)/log($ratio_x); + return exp (-$beta*log($ratio)*log($ratio)); +} + +################################# + +sub date_time_stamp +{ + my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); + my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); + my ($date, $time); + $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec; + $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday; + return ($date, $time); +} + +################################# + +sub extract_sgml_tag_and_span +{ + my ($name, $data) = @_; + ($data =~ m|<$name\s*([^>]*)>(.*?)(.*)|si) ? ($1, $2, $3) : (); +} + +################################# + +sub extract_sgml_tag_attribute +{ + my ($name, $data) = @_; + ($data =~ m|$name\s*=\s*\"([^\"]*)\"|si) ? ($1) : (); +} + +################################# + +sub max +{ + my ($max, $next); + + return unless defined ($max=pop); + while (defined ($next=pop)) + { + $max = $next if $next > $max; + } + return $max; +} + +################################# + +sub min +{ + my ($min, $next); + + return unless defined ($min=pop); + while (defined ($next=pop)) + { + $min = $next if $next < $min; + } + return $min; +} + +################################# + +sub printout_report +{ + if ( $METHOD eq "BOTH" ) + { + foreach my $sys (sort @tst_sys) + { + printf "NIST score = %2.4f BLEU score = %.4f for system \"$sys\"\n",$NISTmt{5}{$sys}{cum},$BLEUmt{4}{$sys}{cum}; + } + } + elsif ($METHOD eq "NIST" ) + { + foreach my $sys (sort @tst_sys) + { + printf "NIST score = %2.4f for system \"$sys\"\n",$NISTmt{5}{$sys}{cum}; + } + } + elsif ($METHOD eq "BLEU" ) + { + foreach my $sys (sort @tst_sys) + { + printf "\nBLEU score = %.4f for system \"$sys\"\n",$BLEUmt{4}{$sys}{cum}; + } + } + printf "\n# ------------------------------------------------------------------------\n\n"; + printf "Individual N-gram scoring\n"; + printf " 1-gram 2-gram 3-gram 4-gram 5-gram 6-gram 7-gram 8-gram 9-gram\n"; + printf " ------ ------ ------ ------ ------ ------ ------ ------ ------\n"; + + if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "NIST") ) + { + foreach my $sys (sort @tst_sys) + { + printf " NIST:"; + for (my $i=1; $i<=$max_Ngram; $i++) + { + printf " %2.4f ",$NISTmt{$i}{$sys}{ind} + } + printf " \"$sys\"\n"; + } + printf "\n"; + } + + if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "BLEU") ) + { + foreach my $sys (sort @tst_sys) + { + printf " BLEU:"; + for (my $i=1; $i<=$max_Ngram; $i++) + { + printf " %2.4f ",$BLEUmt{$i}{$sys}{ind} + } + printf " \"$sys\"\n"; + } + } + + printf "\n# ------------------------------------------------------------------------\n"; + printf "\nCumulative N-gram scoring\n"; + printf " 1-gram 2-gram 3-gram 4-gram 5-gram 6-gram 7-gram 8-gram 9-gram\n"; + printf " ------ ------ ------ ------ ------ ------ ------ ------ ------\n"; + + if (( $METHOD eq "BOTH" ) || ($METHOD eq "NIST")) + { + foreach my $sys (sort @tst_sys) + { + printf " NIST:"; + for (my $i=1; $i<=$max_Ngram; $i++) + { + printf " %2.4f ",$NISTmt{$i}{$sys}{cum} + } + printf " \"$sys\"\n"; + } + } + printf "\n"; + if ( ( $METHOD eq "BOTH" ) || ($METHOD eq "BLEU") ) + { + foreach my $sys (sort @tst_sys) + { + printf " BLEU:"; + for (my $i=1; $i<=$max_Ngram; $i++) + { + printf " %2.4f ",$BLEUmt{$i}{$sys}{cum} + } + printf " \"$sys\"\n"; + } + } +} + +############################################################################################################################### +# Create three files, by using: +# - $prefix : the prefix used for the output file names +# - %overall : a hash containing seg/doc/sys-level scores: +# - $overall{ $SYSTEM_ID }{ 'score' } => system-level score +# - $overall{ $SYSTEM_ID }{ 'documents' }{ $DOCUMENT_ID }{ 'score' } => document-level score +# - $overall{ $SYSTEM_ID }{ 'documents' }{ $DOCUMENT_ID }{ 'segments' }{ $SEGMENT_ID } => segment-level score +############################################################################################################################### +sub outputMetricsMATR +{ + my ( $prefix, %overall ) = @_; + my $fileNameSys = $prefix . '-sys.scr'; + my $fileNameDoc = $prefix . '-doc.scr'; + my $fileNameSeg = $prefix . '-seg.scr'; + open FILEOUT_SYS, '>', $fileNameSys or die "Could not open file: ${fileNameSys}"; + open FILEOUT_DOC, '>', $fileNameDoc or die "Could not open file: ${fileNameDoc}"; + open FILEOUT_SEG, '>', $fileNameSeg or die "Could not open file: ${fileNameSeg}"; + foreach my $sys ( sort( keys( %overall ) ) ) + { + my $scoreSys = $overall{ $sys }{ 'score' }; + print FILEOUT_SYS "${tst_id}\t${sys}\t${scoreSys}\n"; + foreach my $doc ( sort( keys( %{$overall{ $sys }{ 'documents' }} ) ) ) + { + my $scoreDoc = $overall{ $sys }{ 'documents' }{ $doc }{ 'score' }; + print FILEOUT_DOC "${tst_id}\t${sys}\t${doc}\t${scoreDoc}\n"; + foreach my $seg ( nsort keys( %{$overall{ $sys }{ 'documents' }{ $doc }{ 'segments' }} ) ) + { + my $scoreSeg = $overall{ $sys }{ 'documents' }{ $doc }{ 'segments' }{ $seg }{ 'score' }; + print FILEOUT_SEG "${tst_id}\t${sys}\t${doc}\t${seg}\t${scoreSeg}\n"; + } + } + } + close FILEOUT_SEG; + close FILEOUT_DOC; + close FILEOUT_SYS; +} + From 101e52da60a653b3d62dfff75187a657fcf60a94 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 2 May 2017 10:57:00 +0100 Subject: [PATCH 171/176] check for executables before running --- scripts/generic/binarize4moses2.perl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/generic/binarize4moses2.perl b/scripts/generic/binarize4moses2.perl index a703cc241..46e30f43d 100755 --- a/scripts/generic/binarize4moses2.perl +++ b/scripts/generic/binarize4moses2.perl @@ -31,6 +31,9 @@ die("ERROR: please set --phrase-table") unless defined($ptPath); #die("ERROR: please set --lex-ro") unless defined($lexRoPath); die("ERROR: please set --output-dir") unless defined($outPath); #die("ERROR: please set --num-lex-scores") unless defined($numLexScores); +die("ERROR: compile contrib/sigtest-filter") if (!-X "$mosesDir/contrib/sigtest-filter/filter-pt"); +die("ERROR: compile with bjam --with-cmph") if (!-X "$mosesDir/bin/processLexicalTableMin"); +die("ERROR: compile with bjam --with-xmlrpc-c") if (!-X "$mosesDir/bin/CreateProbingPT2"); my $cmd; From 66bd0452308f9a99c13652f6d5b400b908ee714b Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 15 May 2017 21:41:33 +0000 Subject: [PATCH 172/176] Added verbosity option to check-coverage5.cc. --- moses/TranslationModel/UG/check-coverage5.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/moses/TranslationModel/UG/check-coverage5.cc b/moses/TranslationModel/UG/check-coverage5.cc index 549eb7b21..fe9479136 100644 --- a/moses/TranslationModel/UG/check-coverage5.cc +++ b/moses/TranslationModel/UG/check-coverage5.cc @@ -29,6 +29,7 @@ typedef sapt::L2R_Token Token; typedef mmTtrack ttrack_t; size_t ngram_size; +size_t verbosity; string bname; vector ifiles; @@ -77,7 +78,7 @@ main(int argc, char* argv[]) size_t stop = min(snt.size(), i+ngram_size); size_t k = i; while (k < stop && m.extend(snt[k])) ++k; - // cout << i << " " << k-i << " " << m.str(&V) << endl; + if (verbosity) cout << i << " " << k-i << " " << m.str(&V) << endl; if (k - i == ngram_size) ++matched_ngrams; } @@ -98,6 +99,8 @@ interpret_args(int ac, char* av[]) ("help,h", "print this message") ("ngram-size,n", po::value(&ngram_size)->default_value(5), "sample size") + ("verbose,v", po::value(&verbosity)->default_value(0), + "verbosity") ; po::options_description h("Hidden Options"); From 59bad94fc3c75c20a9f95f5c0d69c8f55eaf49e0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 29 May 2017 07:50:59 -0400 Subject: [PATCH 173/176] force softlink --- Jamroot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jamroot b/Jamroot index 6cc7c9427..bbf718663 100644 --- a/Jamroot +++ b/Jamroot @@ -340,6 +340,6 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist { #local temp = [ _shell "bash source ./s.sh" ] ; local temp = [ _shell "mkdir -p $(TOP)/bin" ] ; local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ; -local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ; -local temp = [ _shell "cd $(TOP)/bin && ln -s CreateProbingPT CreateProbingPT2" ] ; +local temp = [ _shell "cd $(TOP)/bin && ln -sf moses moses_chart" ] ; +local temp = [ _shell "cd $(TOP)/bin && ln -sf CreateProbingPT CreateProbingPT2" ] ; From 4b0560b5c9bd95d7c55cb0451e8947de0eee1d6d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 15 Jun 2017 14:26:51 +0100 Subject: [PATCH 174/176] stop printing out UNK when factor == NULL. /Etienne Monneret --- moses/Manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 3650baabb..0cd540991 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1724,7 +1724,7 @@ OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) c for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); if (factor) out << fd << *factor; - else out << fd << UNKNOWN_FACTOR; + //else out << fd << UNKNOWN_FACTOR; } if(markUnknown && word.IsOOV()) { From 54873e49dea377df6387ba90ac435fd44f31a2d6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 17 Jun 2017 05:02:41 +0100 Subject: [PATCH 175/176] bump --- BUILD-INSTRUCTIONS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index 7b9bc3a8a..a41582bfa 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -7,3 +7,4 @@ into the source tree from elsewhere: * "bjam-files" is taken from Boost. * "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm + From e32b8f580533e2b1cdb5b8496a8658f277409a69 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 19 Jul 2017 22:44:32 +0100 Subject: [PATCH 176/176] windows build --- contrib/other-builds/moses2/moses2.sln | 28 + contrib/other-builds/moses2/moses2.vcxproj | 327 ++++++++++ .../moses2/moses2.vcxproj.filters | 574 ++++++++++++++++++ probingpt/util.cpp | 1 + 4 files changed, 930 insertions(+) create mode 100644 contrib/other-builds/moses2/moses2.sln create mode 100644 contrib/other-builds/moses2/moses2.vcxproj create mode 100644 contrib/other-builds/moses2/moses2.vcxproj.filters diff --git a/contrib/other-builds/moses2/moses2.sln b/contrib/other-builds/moses2/moses2.sln new file mode 100644 index 000000000..27454d6a7 --- /dev/null +++ b/contrib/other-builds/moses2/moses2.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2013 for Windows Desktop +VisualStudioVersion = 12.0.40629.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moses2", "moses2\moses2.vcxproj", "{B4304E97-D37F-4022-BD03-841A4FAEE398}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Debug|Win32.ActiveCfg = Debug|Win32 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Debug|Win32.Build.0 = Debug|Win32 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Debug|x64.ActiveCfg = Debug|x64 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Debug|x64.Build.0 = Debug|x64 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Release|Win32.ActiveCfg = Release|Win32 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Release|Win32.Build.0 = Release|Win32 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Release|x64.ActiveCfg = Release|x64 + {B4304E97-D37F-4022-BD03-841A4FAEE398}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/contrib/other-builds/moses2/moses2.vcxproj b/contrib/other-builds/moses2/moses2.vcxproj new file mode 100644 index 000000000..cad2aa33e --- /dev/null +++ b/contrib/other-builds/moses2/moses2.vcxproj @@ -0,0 +1,327 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {B4304E97-D37F-4022-BD03-841A4FAEE398} + Win32Proj + moses2 + + + + Application + true + v140 + Unicode + + + Application + true + v120 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v120 + true + Unicode + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)\..\..\..\;$(SolutionDir)\..\..\..\..\xmlrpc-c\include\;$(SolutionDir)\..\..\..\..\boost_1_55_0\;%(AdditionalIncludeDirectories) + + + Console + true + + + + + + + Level3 + Disabled + NO_COMPACT_TABLES;HAVE_CMPHXXX;MAX_NUM_FACTORS=4;KENLM_MAX_ORDER=6;_USE_MATH_DEFINES;NOMINMAX;WITH_THREADS;NO_PIPES;_WIN32;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)\..\..\..\..\cmph-2.0\src;$(SolutionDir)\..\..\..\..\zlib-1.2.8;$(SolutionDir)\..\..\..\..\xmlrpc-c\include\;$(SolutionDir)\..\..\..\..\mman-win32\;$(SolutionDir)\..\..\..\..\boost_1_56_0\;$(SolutionDir)\..\..\..\moses2\;$(SolutionDir)\..\..\..\;%(AdditionalIncludeDirectories) + MultiThreadedDLL + $(IntDir)\%(RelativeDir)\%(Filename).obj + false + false + + + Console + true + $(SolutionDir)\..\..\..\..\xmlrpc-c\bin\Release-Static-x64;$(SolutionDir)\..\..\..\..\zlib-1.2.8\contrib\vstudio\vc11\x64\ZlibDllRelease;$(SolutionDir)\..\..\..\..\mman-win32\x64\Release;$(SolutionDir)\..\..\..\..\cmph-2.0\visualstudio2013\x64\Release;$(SolutionDir)\..\..\..\..\boost_1_56_0\lib64-msvc-12.0\ + libxmlrpc.lib;libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;zlibwapi.lib;mman.lib;cmph.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NO_COMPACT_TABLES;HAVE_CMPHXXX;MAX_NUM_FACTORS=4;KENLM_MAX_ORDER=6;_USE_MATH_DEFINES;NOMINMAX;WITH_THREADS;NO_PIPES;_WIN32;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)\..\..\..\..\cmph-2.0\src;$(SolutionDir)\..\..\..\..\zlib-1.2.8;$(SolutionDir)\..\..\..\..\xmlrpc-c\include\;$(SolutionDir)\..\..\..\..\mman-win32\;$(SolutionDir)\..\..\..\..\boost_1_56_0\;$(SolutionDir)\..\..\..\moses2\;$(SolutionDir)\..\..\..\;%(AdditionalIncludeDirectories) + MultiThreadedDLL + $(IntDir)\%(RelativeDir)\%(Filename).obj + false + + + Console + true + true + true + $(SolutionDir)\..\..\..\..\xmlrpc-c\bin\Release-Static-x64;$(SolutionDir)\..\..\..\..\zlib-1.2.8\contrib\vstudio\vc11\x64\ZlibDllRelease;$(SolutionDir)\..\..\..\..\mman-win32\x64\Release;$(SolutionDir)\..\..\..\..\cmph-2.0\visualstudio2013\x64\Release;$(SolutionDir)\..\..\..\..\boost_1_56_0\lib64-msvc-12.0\ + libxmlrpc.lib;libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;zlibwapi.lib;mman.lib;cmph.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/contrib/other-builds/moses2/moses2.vcxproj.filters b/contrib/other-builds/moses2/moses2.vcxproj.filters new file mode 100644 index 000000000..70a76ce23 --- /dev/null +++ b/contrib/other-builds/moses2/moses2.vcxproj.filters @@ -0,0 +1,574 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {214e915b-eafb-4e76-b9a2-1fbfe99424b7} + + + {d6922dd8-d86c-4c79-8587-a3a412a2e9f6} + + + {4e9f8bc8-ef01-463e-a309-df80bb1e63b5} + + + {58e84bec-0301-41b2-a4c2-dc00162c550a} + + + {ccdac19b-8883-4c3d-8006-365d4846688f} + + + {929b6fab-e56b-4218-bfb6-da1c50a6c48e} + + + {797f4634-c680-45fc-b30e-b5e90ee5d224} + + + {5a603a90-587d-4fef-ab01-ba6e5173869e} + + + {85931b40-138d-48c4-a288-c4ee96039879} + + + {a044c0cd-45cc-4a91-bd83-0e04d11f2afa} + + + {fe76b14f-0997-4f14-b3d6-c6f1b725bf72} + + + {dea1c12e-1a75-4313-9c03-28689a06a1ee} + + + {b895cea0-249f-4b4a-9b47-872d810fd4f2} + + + {12cccf90-d56a-4aca-8780-41777bb2f291} + + + {e8564ac2-1055-4b10-9da0-4ae82e713881} + + + {cad8906a-d5b9-447d-952c-68bd6fa93bef} + + + {806d58ef-f545-4428-a8f0-f870e211d15f} + + + {8f10d9c1-66d4-4490-b310-d3f4973bad29} + + + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util\double-conversion + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\FF + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\legacy + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\parameters + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\SCFG + + + Source Files\server + + + Source Files\server + + + Source Files\server + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files\util + + + Source Files\TranslationModel\Memory + + + Source Files\TranslationModel + + + Source Files\TranslationModel + + + Source Files\TranslationModel + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\LexicalReordering + + + Source Files\FF\OSM + + + Source Files\FF\OSM + + + Source Files\FF\OSM + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\util + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\PhraseBased + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\lm + + + Source Files\util + + + Source Files\util + + + Source Files\PhraseBased\Normal + + + Source Files\PhraseBased\Normal + + + Source Files\PhraseBased\Normal + + + Source Files\PhraseBased\CubePruningMiniStack + + + Source Files\PhraseBased\CubePruningMiniStack + + + Source Files\PhraseBased\CubePruningMiniStack + + + Source Files\Moses2LM + + + Source Files\Moses2LM + + + Source Files\Moses2LM + + + Source Files\Moses2LM + + + Source Files\SCFG\nbest + + + Source Files\SCFG\nbest + + + Source Files\SCFG\nbest + + + Source Files\SCFG\nbest + + + Source Files\TranslationModel + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\ProbingPT + + + Source Files\FF + + + Source Files\FF + + + \ No newline at end of file diff --git a/probingpt/util.cpp b/probingpt/util.cpp index cf4edb81a..a98ee41ea 100644 --- a/probingpt/util.cpp +++ b/probingpt/util.cpp @@ -1,3 +1,4 @@ +#include #include "util.h" #include "util/exception.hh"