on-the-fly unbinarization of internal tree structure (for translation models extracted from binarized treebanks)

This commit is contained in:
Rico Sennrich 2015-03-18 17:36:32 +00:00
parent fc15e03ebe
commit 1568afb737
5 changed files with 127 additions and 66 deletions

View File

@ -168,9 +168,10 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
}
mytree->Combine(previous_trees);
mytree->Unbinarize();
return mytree;
} else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
UTIL_THROW2("Error: k-best tree output active, but no internal tree structure found");
}
}

View File

@ -115,6 +115,44 @@ void InternalTree::Combine(const std::vector<TreePointer> &previous)
}
}
//take tree with virtual nodes (created with relax-parse --RightBinarize or --LeftBinarize) and reconstruct original tree.
void InternalTree::Unbinarize()
{
// nodes with virtual label cannot be unbinarized
if (m_value.empty() || m_value[0] == '^') {
return;
}
//if node has child that is virtual node, get unbinarized list of children
for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLabel()[0] == '^') {
std::vector<TreePointer> new_children;
GetUnbinarizedChildren(new_children);
m_children = new_children;
break;
}
}
//recursion
for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
(*it)->Unbinarize();
}
}
//get the children of a node in a binarized tree; if a child is virtual, (transitively) replace it with its children
void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
{
for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
const std::string &label = (*itx)->GetLabel();
if (!label.empty() && label[0] == '^') {
(*itx)->GetUnbinarizedChildren(ret);
}
else {
ret.push_back(*itx);
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
{

View File

@ -38,6 +38,8 @@ public:
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
void Unbinarize();
void GetUnbinarizedChildren(std::vector<TreePointer> &children) const;
const std::string & GetLabel() const {
return m_value;
}
@ -93,6 +95,68 @@ public:
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
$emit(std::vector<TreePointer>::iterator)
path->push_back(tree);
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
path->push_back((*it).get());
$yield(it);
path->pop_back();
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
path->pop_back();
$stop;
};
};
@ -113,68 +177,4 @@ public:
};
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
$emit(std::vector<TreePointer>::iterator)
path->push_back(tree);
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
path->push_back((*it).get());
$yield(it);
path->pop_back();
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
path->pop_back();
$stop;
};
}

View File

@ -70,6 +70,11 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
}
mytree->Combine(previous_trees);
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
if (m_binarized && full_sentence) {
mytree->Unbinarize();
}
return new TreeState(mytree);
} else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
@ -77,4 +82,17 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
}
void TreeStructureFeature::SetParameter(const std::string& key, const std::string& value)
{
std::cerr << "setting: " << this->GetScoreProducerDescription() << " - " << key << "\n";
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "binarized") { // if trees have been binarized before learning translation model; output unbinarized trees
m_binarized = true;
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
}
}

View File

@ -34,9 +34,11 @@ class TreeStructureFeature : public StatefulFeatureFunction
{
SyntaxConstraints* m_constraints;
LabelSet* m_labelset;
bool m_binarized;
public:
TreeStructureFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {
:StatefulFeatureFunction(0, line)
, m_binarized(false) {
ReadParameters();
}
~TreeStructureFeature() {
@ -53,6 +55,8 @@ public:
return true;
}
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown