2012-10-15 16:58:33 +04:00
|
|
|
#include "search/vertex.hh"
|
|
|
|
|
|
|
|
#include "search/context.hh"
|
|
|
|
|
2013-02-14 17:11:53 +04:00
|
|
|
#include <boost/unordered_map.hpp>
|
|
|
|
|
2012-10-15 16:58:33 +04:00
|
|
|
#include <algorithm>
|
|
|
|
#include <functional>
|
2015-03-28 15:48:20 +03:00
|
|
|
#include <cassert>
|
2012-10-15 16:58:33 +04:00
|
|
|
|
|
|
|
namespace search {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2013-02-14 17:11:53 +04:00
|
|
|
const uint64_t kCompleteAdd = static_cast<uint64_t>(-1);
|
|
|
|
|
|
|
|
class DivideLeft {
|
|
|
|
public:
|
|
|
|
explicit DivideLeft(unsigned char index)
|
|
|
|
: index_(index) {}
|
|
|
|
|
|
|
|
uint64_t operator()(const lm::ngram::ChartState &state) const {
|
2015-04-30 08:05:11 +03:00
|
|
|
return (index_ < state.left.length) ?
|
2013-02-14 17:11:53 +04:00
|
|
|
state.left.pointers[index_] :
|
|
|
|
(kCompleteAdd - state.left.full);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
unsigned char index_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class DivideRight {
|
|
|
|
public:
|
|
|
|
explicit DivideRight(unsigned char index)
|
|
|
|
: index_(index) {}
|
|
|
|
|
|
|
|
uint64_t operator()(const lm::ngram::ChartState &state) const {
|
|
|
|
return (index_ < state.right.length) ?
|
|
|
|
static_cast<uint64_t>(state.right.words[index_]) :
|
|
|
|
(kCompleteAdd - state.left.full);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
unsigned char index_;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class Divider> void Split(const Divider ÷r, const std::vector<HypoState> &hypos, std::vector<VertexNode> &extend) {
|
|
|
|
// Map from divider to index in extend.
|
|
|
|
typedef boost::unordered_map<uint64_t, std::size_t> Lookup;
|
|
|
|
Lookup lookup;
|
|
|
|
for (std::vector<HypoState>::const_iterator i = hypos.begin(); i != hypos.end(); ++i) {
|
|
|
|
uint64_t key = divider(i->state);
|
|
|
|
std::pair<Lookup::iterator, bool> res(lookup.insert(std::make_pair(key, extend.size())));
|
|
|
|
if (res.second) {
|
|
|
|
extend.resize(extend.size() + 1);
|
|
|
|
extend.back().AppendHypothesis(*i);
|
|
|
|
} else {
|
|
|
|
extend[res.first->second].AppendHypothesis(*i);
|
|
|
|
}
|
2012-10-15 16:58:33 +04:00
|
|
|
}
|
2013-02-14 17:11:53 +04:00
|
|
|
//assert((extend.size() != 1) || (hypos.size() == 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
lm::WordIndex Identify(const lm::ngram::Right &right, unsigned char index) {
|
|
|
|
return right.words[index];
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t Identify(const lm::ngram::Left &left, unsigned char index) {
|
|
|
|
return left.pointers[index];
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class Side> class DetermineSame {
|
|
|
|
public:
|
2015-04-30 08:05:11 +03:00
|
|
|
DetermineSame(const Side &side, unsigned char guaranteed)
|
2013-02-14 17:11:53 +04:00
|
|
|
: side_(side), guaranteed_(guaranteed), shared_(side.length), complete_(true) {}
|
|
|
|
|
|
|
|
void Consider(const Side &other) {
|
|
|
|
if (shared_ != other.length) {
|
|
|
|
complete_ = false;
|
|
|
|
if (shared_ > other.length)
|
|
|
|
shared_ = other.length;
|
|
|
|
}
|
|
|
|
for (unsigned char i = guaranteed_; i < shared_; ++i) {
|
|
|
|
if (Identify(side_, i) != Identify(other, i)) {
|
|
|
|
shared_ = i;
|
|
|
|
complete_ = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned char Shared() const { return shared_; }
|
|
|
|
|
|
|
|
bool Complete() const { return complete_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Side &side_;
|
|
|
|
unsigned char guaranteed_, shared_;
|
|
|
|
bool complete_;
|
2012-10-15 16:58:33 +04:00
|
|
|
};
|
|
|
|
|
2013-02-14 17:11:53 +04:00
|
|
|
// Custom enum to save memory: valid values of policy_.
|
|
|
|
// Alternate and there is still alternation to do.
|
|
|
|
const unsigned char kPolicyAlternate = 0;
|
|
|
|
// Branch based on left state only, because right ran out or this is a left tree.
|
|
|
|
const unsigned char kPolicyOneLeft = 1;
|
|
|
|
// Branch based on right state only.
|
|
|
|
const unsigned char kPolicyOneRight = 2;
|
|
|
|
// Reveal everything in the next branch. Used to terminate the left/right policies.
|
|
|
|
// static const unsigned char kPolicyEverything = 3;
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct GreaterByScore : public std::binary_function<const HypoState &, const HypoState &, bool> {
|
|
|
|
bool operator()(const HypoState &first, const HypoState &second) const {
|
|
|
|
return first.score > second.score;
|
|
|
|
}
|
|
|
|
};
|
2012-10-15 16:58:33 +04:00
|
|
|
} // namespace
|
|
|
|
|
2013-02-14 17:11:53 +04:00
|
|
|
void VertexNode::FinishRoot() {
|
|
|
|
std::sort(hypos_.begin(), hypos_.end(), GreaterByScore());
|
|
|
|
extend_.clear();
|
|
|
|
// HACK: extend to one hypo so that root can be blank.
|
|
|
|
state_.left.full = false;
|
|
|
|
state_.left.length = 0;
|
|
|
|
state_.right.length = 0;
|
|
|
|
right_full_ = false;
|
|
|
|
niceness_ = 0;
|
|
|
|
policy_ = kPolicyAlternate;
|
|
|
|
if (hypos_.size() == 1) {
|
|
|
|
extend_.resize(1);
|
|
|
|
extend_.front().AppendHypothesis(hypos_.front());
|
|
|
|
extend_.front().FinishedAppending(0, 0);
|
|
|
|
}
|
|
|
|
if (hypos_.empty()) {
|
|
|
|
bound_ = -INFINITY;
|
|
|
|
} else {
|
|
|
|
bound_ = hypos_.front().score;
|
2012-10-15 16:58:33 +04:00
|
|
|
}
|
2013-02-14 17:11:53 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void VertexNode::FinishedAppending(const unsigned char common_left, const unsigned char common_right) {
|
|
|
|
assert(!hypos_.empty());
|
|
|
|
assert(extend_.empty());
|
|
|
|
bound_ = hypos_.front().score;
|
|
|
|
state_ = hypos_.front().state;
|
|
|
|
bool all_full = state_.left.full;
|
|
|
|
bool all_non_full = !state_.left.full;
|
|
|
|
DetermineSame<lm::ngram::Left> left(state_.left, common_left);
|
|
|
|
DetermineSame<lm::ngram::Right> right(state_.right, common_right);
|
|
|
|
for (std::vector<HypoState>::const_iterator i = hypos_.begin() + 1; i != hypos_.end(); ++i) {
|
|
|
|
all_full &= i->state.left.full;
|
|
|
|
all_non_full &= !i->state.left.full;
|
|
|
|
left.Consider(i->state.left);
|
|
|
|
right.Consider(i->state.right);
|
2012-10-15 16:58:33 +04:00
|
|
|
}
|
2013-02-14 17:11:53 +04:00
|
|
|
state_.left.full = all_full && left.Complete();
|
|
|
|
right_full_ = all_full && right.Complete();
|
|
|
|
state_.left.length = left.Shared();
|
|
|
|
state_.right.length = right.Shared();
|
|
|
|
|
|
|
|
if (!all_full && !all_non_full) {
|
|
|
|
policy_ = kPolicyAlternate;
|
|
|
|
} else if (left.Complete()) {
|
|
|
|
policy_ = kPolicyOneRight;
|
|
|
|
} else if (right.Complete()) {
|
|
|
|
policy_ = kPolicyOneLeft;
|
|
|
|
} else {
|
|
|
|
policy_ = kPolicyAlternate;
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
2013-02-14 17:11:53 +04:00
|
|
|
niceness_ = state_.left.length + state_.right.length;
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
|
|
|
|
2013-02-14 17:11:53 +04:00
|
|
|
void VertexNode::BuildExtend() {
|
|
|
|
// Already built.
|
|
|
|
if (!extend_.empty()) return;
|
|
|
|
// Nothing to build since this is a leaf.
|
|
|
|
if (hypos_.size() <= 1) return;
|
|
|
|
bool left_branch = true;
|
|
|
|
switch (policy_) {
|
|
|
|
case kPolicyAlternate:
|
|
|
|
left_branch = (state_.left.length <= state_.right.length);
|
|
|
|
break;
|
|
|
|
case kPolicyOneLeft:
|
|
|
|
left_branch = true;
|
|
|
|
break;
|
|
|
|
case kPolicyOneRight:
|
|
|
|
left_branch = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (left_branch) {
|
|
|
|
Split(DivideLeft(state_.left.length), hypos_, extend_);
|
|
|
|
} else {
|
|
|
|
Split(DivideRight(state_.right.length), hypos_, extend_);
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
2013-02-14 17:11:53 +04:00
|
|
|
for (std::vector<VertexNode>::iterator i = extend_.begin(); i != extend_.end(); ++i) {
|
|
|
|
// TODO: provide more here for branching?
|
|
|
|
i->FinishedAppending(state_.left.length, state_.right.length);
|
2012-10-15 16:58:33 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace search
|