Reintroduce deterministic cube pruning as option

Use --cube-pruning-deterministic-search or --cbds
This commit is contained in:
Michael Denkowski 2015-09-25 11:03:23 +02:00
parent d2a6aa752e
commit 56e3bc1ea2
8 changed files with 79 additions and 39 deletions

View File

@ -273,9 +273,11 @@ BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
////////////////////////////////////////////////////////////////////////////////
BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
, HypothesisStackCubePruning &stack)
, HypothesisStackCubePruning &stack
, bool deterministic)
: m_bitmap(bitmap)
, m_stack(stack)
, m_deterministic(deterministic)
, m_numStackInsertions(0)
{
m_hypotheses = HypothesisSet();
@ -309,10 +311,13 @@ BitmapContainer::Enqueue(int hypothesis_pos
, Hypothesis *hypothesis
, BackwardsEdge *edge)
{
// Only supply target phrase if running deterministic search mode
const TargetPhrase *target_phrase = m_deterministic ? &(hypothesis->GetCurrTargetPhrase()) : NULL;
HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
, translation_pos
, hypothesis
, edge);
, edge
, target_phrase);
IFVERBOSE(2) {
item->GetHypothesis()->GetManager().GetSentenceStats().StartTimeManageCubes();
}

View File

@ -61,6 +61,7 @@ private:
size_t m_hypothesis_pos, m_translation_pos;
Hypothesis *m_hypothesis;
BackwardsEdge *m_edge;
boost::shared_ptr<TargetPhrase> m_target_phrase;
HypothesisQueueItem();
@ -68,11 +69,16 @@ public:
HypothesisQueueItem(const size_t hypothesis_pos
, const size_t translation_pos
, Hypothesis *hypothesis
, BackwardsEdge *edge)
, BackwardsEdge *edge
, const TargetPhrase *target_phrase = NULL)
: m_hypothesis_pos(hypothesis_pos)
, m_translation_pos(translation_pos)
, m_hypothesis(hypothesis)
, m_edge(edge) {
if (target_phrase != NULL)
{
m_target_phrase.reset(new TargetPhrase(*target_phrase));
}
}
~HypothesisQueueItem() {
@ -93,6 +99,10 @@ public:
BackwardsEdge *GetBackwardsEdge() {
return m_edge;
}
boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
return m_target_phrase;
}
};
//! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
@ -103,20 +113,26 @@ public:
float scoreA = itemA->GetHypothesis()->GetTotalScore();
float scoreB = itemB->GetHypothesis()->GetTotalScore();
return (scoreA < scoreB);
/*
if (scoreA < scoreB)
{
return true;
return true;
}
else if (scoreA < scoreB)
else if (scoreA > scoreB)
{
return false;
return false;
}
else
{
return itemA < itemB;
}*/
// Equal scores: break ties by comparing target phrases (if they exist)
boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
if (!phrA || !phrB)
{
// Fallback: scoreA < scoreB == false, non-deterministic sort
return false;
}
return (phrA->Compare(*phrB) < 0);
}
}
};
@ -134,18 +150,6 @@ public:
float scoreB = hypoB->GetTotalScore();
return (scoreA > scoreB);
/*
{
return true;
}
else if (scoreA < scoreB)
{
return false;
}
else
{
return hypoA < hypoB;
}*/
}
};
@ -210,13 +214,15 @@ private:
BackwardsEdgeSet m_edges;
HypothesisQueue m_queue;
size_t m_numStackInsertions;
bool m_deterministic;
// We always require a corresponding bitmap to be supplied.
BitmapContainer();
BitmapContainer(const BitmapContainer &);
public:
BitmapContainer(const WordsBitmap &bitmap
, HypothesisStackCubePruning &stack);
, HypothesisStackCubePruning &stack
, bool deterministic_sort = false);
// The destructor will also delete all the edges that are
// connected to this BitmapContainer.

View File

@ -39,6 +39,7 @@ HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
m_deterministic = manager.options().cube.deterministic_search;
}
/** remove all hypotheses from the collection */
@ -148,7 +149,7 @@ void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
"Should have added hypothesis " << *hypo);
const WordsBitmap &bitmap = hypo->GetWordsBitmap();
m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this, m_deterministic);
}
void HypothesisStackCubePruning::PruneToSize(size_t newSize)
@ -258,7 +259,7 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
BitmapContainer *bmContainer;
if (bcExists == m_bitmapAccessor.end()) {
bmContainer = new BitmapContainer(newBitmap, stack);
bmContainer = new BitmapContainer(newBitmap, stack, m_deterministic);
m_bitmapAccessor[newBitmap] = bmContainer;
} else {
bmContainer = bcExists->second;

View File

@ -52,6 +52,7 @@ protected:
float m_beamWidth; /**< minimum score due to threashold pruning */
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
bool m_deterministic; /**< flag to determine whether to sort hypotheses deterministically */
/** add hypothesis to stack. Prune if necessary.
* Returns false if equiv hypo exists in collection, otherwise returns true

View File

@ -116,6 +116,7 @@ Parameter::Parameter()
AddParam(cube_opts,"cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
AddParam(cube_opts,"cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
AddParam(cube_opts,"cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
AddParam(cube_opts,"cube-pruning-deterministic-search", "cbds", "Break ties deterministically during search");
///////////////////////////////////////////////////////////////////////////////////////
// minimum bayes risk decoding

View File

@ -32,7 +32,14 @@ public:
} else if (scoreA > scoreB) {
return false;
} else {
return A < B;
// Equal scores: break ties by comparing target phrases (if they exist)
boost::shared_ptr<TargetPhrase> phrA = A->Top()->GetTargetPhrase();
boost::shared_ptr<TargetPhrase> phrB = B->Top()->GetTargetPhrase();
if (!phrA || !phrB) {
// Fallback: compare pointers, non-deterministic sort
return A < B;
}
return (phrA->Compare(*phrB) < 0);
}
}
};

View File

@ -13,6 +13,7 @@ namespace Moses
param.SetParameter(diversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
return true;
}
@ -30,20 +31,37 @@ namespace Moses
if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
si = params.find("cube-pruning-lazy-scoring");
if (si != params.end())
{
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "true" or spec == "on" or spec == "1")
lazy_scoring = true;
else if (spec == "false" or spec == "off" or spec == "0")
lazy_scoring = false;
else
if (si != params.end())
{
char const* msg
= "Error parsing specification for cube-pruning-lazy-scoring";
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "true" or spec == "on" or spec == "1")
lazy_scoring = true;
else if (spec == "false" or spec == "off" or spec == "0")
lazy_scoring = false;
else
{
char const* msg
= "Error parsing specification for cube-pruning-lazy-scoring";
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
}
}
}
si = params.find("cube-pruning-deterministic-search");
if (si != params.end())
{
std::string spec = xmlrpc_c::value_string(si->second);
if (spec == "true" or spec == "on" or spec == "1")
deterministic_search = true;
else if (spec == "false" or spec == "off" or spec == "0")
deterministic_search = false;
else
{
char const* msg
= "Error parsing specification for cube-pruning-deterministic-search";
xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
}
}
return true;
}
#endif

View File

@ -12,6 +12,7 @@ namespace Moses
size_t pop_limit;
size_t diversity;
bool lazy_scoring;
bool deterministic_search;
bool init(Parameter const& param);
CubePruningOptions(Parameter const& param);