SyntaxConstraintFeature (without any actual constraints; useful to build/output syntax tree from GHKM tree fragments)

This commit is contained in:
Rico Sennrich 2014-01-16 18:38:23 +00:00
parent ed25bb2b99
commit 9e177cb472
5 changed files with 385 additions and 0 deletions

View File

@ -50,6 +50,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/FeatureVector.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/SyntaxConstraintFeature.h"
#include "util/exception.hh"
using namespace std;
@ -393,7 +394,20 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
"No output file for tree fragments specified");
//Tree of full sentence (to stderr)
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i];
if (ff->GetScoreProducerDescription() == "SyntaxConstraintFeature0") {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break;
}
}
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
}
//DIMw

View File

@ -34,6 +34,7 @@
#include "moses/FF/ExternalFeature.h"
#include "moses/FF/ConstrainedDecoding.h"
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/SyntaxConstraintFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
#include "moses/FF/SkeletonStatelessFF.h"
@ -172,6 +173,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(ConstrainedDecoding);
MOSES_FNAME(CoveredReferenceFeature);
MOSES_FNAME(ExternalFeature);
MOSES_FNAME(SyntaxConstraintFeature);
MOSES_FNAME(SoftMatchingFeature);
MOSES_FNAME(SkeletonStatelessFF);

View File

@ -0,0 +1,186 @@
#include "SyntaxConstraintFeature.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/TargetPhrase.h"
#include <boost/shared_ptr.hpp>
#include <vector>
using namespace std;
namespace Moses
{
InternalTree::InternalTree(const std::string & line, const bool terminal) {
size_t found = line.find_first_of("[] ");
m_isTerminal = terminal;
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
std::string value = "";
char token = 0;
while (token != ']' && pos != std::string::npos)
{
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
if (token == '[') {
if (m_value.size() > 0) {
TreePointer child(new InternalTree(value, false));
m_children.push_back(child);
pos = child->AddSubTree(line, pos+1);
}
else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (value.size() > 0 && ! m_value.size() > 0) {
m_value = value;
}
else if (value.size() > 0) {
m_isTerminal = false;
TreePointer child(new InternalTree(value, true));
m_children.push_back(child);
}
if (token == ' ') {
pos++;
}
}
if (m_children.size() > 0) {
m_isTerminal = false;
}
}
if (pos == std::string::npos) {
return line.size();
}
return min(line.size(),pos+1);
}
std::string InternalTree::GetString() const {
std::string ret = " ";
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
{
ret += (*it)->GetString();
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
}
else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
FFState* SyntaxConstraintFeature::EvaluateChart(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
{
std::string tree;
bool found = 0;
cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found);
TreePointer mytree (new InternalTree(tree));
//get subtrees (in target order)
std::vector<TreePointer> previous_trees;
for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos);
if (word.IsNonTerminal()) {
size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd);
const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID));
const TreePointer prev_tree = prev->GetTree();
previous_trees.push_back(prev_tree);
}
}
mytree->Combine(previous_trees);
return new TreeState(mytree);
}
}

View File

@ -0,0 +1,149 @@
#pragma once
#include <string>
#include "StatefulFeatureFunction.h"
#include "FFState.h"
#include <boost/shared_ptr.hpp>
#include "util/generator.hh"
#include "util/exception.hh"
namespace Moses
{
class InternalTree;
typedef boost::shared_ptr<InternalTree> TreePointer;
class InternalTree
{
std::string m_value;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, const bool terminal = false);
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString() const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
void AddChild(TreePointer child) {
m_children.push_back(child);
}
bool IsTerminal() const {
return m_isTerminal;
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
class TreeState : public FFState
{
TreePointer m_tree;
public:
TreeState(TreePointer tree)
:m_tree(tree)
{}
TreePointer GetTree() const {
return m_tree;
}
int Compare(const FFState& other) const {return 0;};
};
class SyntaxConstraintFeature : public StatefulFeatureFunction
{
public:
SyntaxConstraintFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new TreeState(TreePointer());
}
bool IsUseable(const FactorMask &mask) const {
return true;
}
void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
}
$stop;
};
}

34
util/generator.hh Normal file
View File

@ -0,0 +1,34 @@
#pragma once
// generator/continuation for C++
// author: Andrew Fedoniouk @ terrainformatica.com
// idea borrowed from: "coroutines in C" Simon Tatham,
// http://www.chiark.greenend.org.uk/~sgtatham/coroutines.html
// BSD license
template<typename T>
struct _generator
{
T* _stack;
int _line;
_generator():_stack(0), _line(-1) {}
void _push() { T* n = new T; *n = *static_cast<T*>(this); _stack = n; }
bool _pop() { if(!_stack) return false; T* t = _stack; *static_cast<T*>(this) = *_stack; t->_stack = 0; delete t; return true; }
~_generator() { while(_pop()); }
};
#define $generator(NAME) struct NAME : public _generator<NAME>
#define $emit(T) bool operator()(T& _rv) { \
if(_line < 0) _line=0; \
$START: switch(_line) { case 0:;
#define $stop } _line = 0; if(_pop()) goto $START; return false; }
#define $restart(WITH) { _push(); _stack->_line = __LINE__; _line=0; WITH; goto $START; case __LINE__:; }
#define $yield(V) \
do {\
_line=__LINE__;\
_rv = (V); return true; case __LINE__:;\
} while (0)