diff --git a/kenlm/lm/left.cc b/kenlm/lm/left.cc index 74081bdcd..082984f81 100644 --- a/kenlm/lm/left.cc +++ b/kenlm/lm/left.cc @@ -12,6 +12,12 @@ namespace lm { namespace ngram { std::vector ChartState::recombCount(8,0); +std::vector left_revisit_count(5,0); +std::vector left_revisit_change(5,0); +std::vector left_revisit_count_partial(5,0); +std::vector left_revisit_change_partial(5,0); + +Counters global_left_counters = Counters(); ChartState::~ChartState() { diff --git a/kenlm/lm/left.hh b/kenlm/lm/left.hh index 1a839e1d9..f136ac6f3 100644 --- a/kenlm/lm/left.hh +++ b/kenlm/lm/left.hh @@ -120,12 +120,38 @@ public: void CreatePreAndSuffices(const Moses::ChartHypothesis &hyp); static std::vector recombCount; - + protected: const Moses::ChartHypothesis *hypo; const Moses::Phrase *prefix, *suffix; }; + +extern std::vector left_revisit_count, left_revisit_count_partial; +extern std::vector left_revisit_change, left_revisit_change_partial; + +class Counters { + public: + Counters() { memset(full, 0, sizeof(full)); memset(partial, 0, sizeof(partial)); } + + ~Counters() { + for (unsigned char i = 0; i < kMaxOrder; ++i) { + std::cerr << "Left: " << (unsigned)i << std::endl; + for (unsigned char j = 0; j < kMaxOrder; ++j) { + std::cerr << (unsigned)j << ' ' << full[i][j] << ' ' << partial[i][j] << std::endl; + } + } + } + + void Add(const ChartState &final) { + ++(final.full ? full : partial)[final.left.length][final.right.length]; + } + + uint64_t full[kMaxOrder][kMaxOrder], partial[kMaxOrder][kMaxOrder]; +}; + +extern Counters global_left_counters; + inline size_t hash_value(const ChartState &state) { size_t hashes[2]; hashes[0] = hash_value(state.left); @@ -169,11 +195,13 @@ template class RuleScore { } void NonTerminal(const ChartState &in, float prob) { + ++(in.full ? left_revisit_count : left_revisit_count_partial)[in.left.length]; + double &revisit_change = (in.full ? left_revisit_change : left_revisit_change_partial)[in.left.length]; prob_ += prob; if (!in.left.length) { if (in.full) { - for (const float *i = out_.right.backoff; i < out_.right.backoff + out_.right.length; ++i) prob_ += *i; + for (const float *i = out_.right.backoff; i < out_.right.backoff + out_.right.length; ++i) { prob_ += *i; revisit_change += *i; } left_done_ = true; out_.right = in.right; } @@ -195,7 +223,7 @@ template class RuleScore { float backoffs[kMaxOrder - 1], backoffs2[kMaxOrder - 1]; float *back = backoffs, *back2 = backoffs2; unsigned char next_use; - ProcessRet(model_.ExtendLeft(out_.right.words, out_.right.words + out_.right.length, out_.right.backoff, in.left.pointers[0], 1, back, next_use)); + ProcessRet(model_.ExtendLeft(out_.right.words, out_.right.words + out_.right.length, out_.right.backoff, in.left.pointers[0], 1, back, next_use), revisit_change); if (next_use != out_.right.length) { left_done_ = true; if (!next_use) { @@ -205,7 +233,7 @@ template class RuleScore { } unsigned char extend_length = 2; for (const uint64_t *i = in.left.pointers + 1; i < in.left.pointers + in.left.length; ++i, ++extend_length) { - ProcessRet(model_.ExtendLeft(out_.right.words, out_.right.words + next_use, back, *i, extend_length, back2, next_use)); + ProcessRet(model_.ExtendLeft(out_.right.words, out_.right.words + next_use, back, *i, extend_length, back2, next_use), revisit_change); if (next_use != out_.right.length) { left_done_ = true; if (!next_use) { @@ -217,7 +245,7 @@ template class RuleScore { } if (in.full) { - for (const float *i = back; i != back + next_use; ++i) prob_ += *i; + for (const float *i = back; i != back + next_use; ++i) { prob_ += *i; revisit_change += *i; } left_done_ = true; out_.right = in.right; return; @@ -245,12 +273,14 @@ template class RuleScore { float Finish() { // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram. out_.full = left_done_ || (out_.left.length == model_.Order() - 1); + global_left_counters.Add(out_); return prob_; } private: - void ProcessRet(const FullScoreReturn &ret) { + void ProcessRet(const FullScoreReturn &ret, double &revisit_change) { prob_ += ret.prob; + revisit_change += ret.prob; if (left_done_) return; if (ret.independent_left) { left_done_ = true; diff --git a/moses-chart-cmd/src/Main.cpp b/moses-chart-cmd/src/Main.cpp index c5c128421..ff1485343 100644 --- a/moses-chart-cmd/src/Main.cpp +++ b/moses-chart-cmd/src/Main.cpp @@ -272,13 +272,25 @@ int main(int argc, char* argv[]) cerr << endl << "recomb_stats\t"; vector &recombCount = lm::ngram::ChartState::recombCount; - for (int i = 0; i < recombCount.size(); ++i) + for (unsigned i = 0; i < recombCount.size(); ++i) { int count = recombCount[i]; cerr << count << "\t"; } - cerr << endl; + cerr << "\nLeft adjustments"; + for (unsigned i = 0; i < 5; ++i) + cerr << ' ' << (lm::ngram::left_revisit_change[i] / static_cast(lm::ngram::left_revisit_count[i])); + cerr << "\nLeft revisits"; + for (unsigned i = 0; i < 5; ++i) + cerr << ' ' << lm::ngram::left_revisit_count[i]; + cerr << "\nPartial left adjustments"; + for (unsigned i = 0; i < 5; ++i) + cerr << ' ' << (lm::ngram::left_revisit_change_partial[i] / static_cast(lm::ngram::left_revisit_count_partial[i])); + cerr << "\nPartial left revisits"; + for (unsigned i = 0; i < 5; ++i) + cerr << ' ' << lm::ngram::left_revisit_count_partial[i]; + cerr << endl; } #ifdef WITH_THREADS @@ -299,6 +311,12 @@ int main(int argc, char* argv[]) cerr << count << "\t"; } cerr << endl; + + std::ifstream proc_stat("/proc/self/status"); + std::string line; + while (getline(proc_stat, line)) { + std::cerr << line << '\n'; + } #ifdef HACK_EXIT //This avoids that detructors are called (it can take a long time)