mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
SyntaxConstraintFeature (without any actual constraints; useful to build/output syntax tree from GHKM tree fragments)
This commit is contained in:
parent
ed25bb2b99
commit
9e177cb472
@ -50,6 +50,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "moses/FeatureVector.h"
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/FF/StatelessFeatureFunction.h"
|
||||
#include "moses/FF/SyntaxConstraintFeature.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
@ -393,7 +394,20 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
|
||||
"No output file for tree fragments specified");
|
||||
|
||||
//Tree of full sentence (to stderr)
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
const StatefulFeatureFunction *ff = sff[i];
|
||||
if (ff->GetScoreProducerDescription() == "SyntaxConstraintFeature0") {
|
||||
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
|
||||
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
|
||||
|
||||
}
|
||||
|
||||
//DIMw
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "moses/FF/ExternalFeature.h"
|
||||
#include "moses/FF/ConstrainedDecoding.h"
|
||||
#include "moses/FF/CoveredReferenceFeature.h"
|
||||
#include "moses/FF/SyntaxConstraintFeature.h"
|
||||
#include "moses/FF/SoftMatchingFeature.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
@ -172,6 +173,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(ConstrainedDecoding);
|
||||
MOSES_FNAME(CoveredReferenceFeature);
|
||||
MOSES_FNAME(ExternalFeature);
|
||||
MOSES_FNAME(SyntaxConstraintFeature);
|
||||
MOSES_FNAME(SoftMatchingFeature);
|
||||
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
|
186
moses/FF/SyntaxConstraintFeature.cpp
Normal file
186
moses/FF/SyntaxConstraintFeature.cpp
Normal file
@ -0,0 +1,186 @@
|
||||
#include "SyntaxConstraintFeature.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, const bool terminal) {
|
||||
|
||||
size_t found = line.find_first_of("[] ");
|
||||
m_isTerminal = terminal;
|
||||
|
||||
if (found == line.npos) {
|
||||
m_value = line;
|
||||
}
|
||||
|
||||
else {
|
||||
AddSubTree(line, 0);
|
||||
}
|
||||
}
|
||||
|
||||
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
|
||||
|
||||
std::string value = "";
|
||||
char token = 0;
|
||||
|
||||
while (token != ']' && pos != std::string::npos)
|
||||
{
|
||||
size_t oldpos = pos;
|
||||
pos = line.find_first_of("[] ", pos);
|
||||
if (pos == std::string::npos) break;
|
||||
token = line[pos];
|
||||
value = line.substr(oldpos,pos-oldpos);
|
||||
|
||||
if (token == '[') {
|
||||
if (m_value.size() > 0) {
|
||||
TreePointer child(new InternalTree(value, false));
|
||||
m_children.push_back(child);
|
||||
pos = child->AddSubTree(line, pos+1);
|
||||
}
|
||||
else {
|
||||
if (value.size() > 0) {
|
||||
m_value = value;
|
||||
}
|
||||
pos = AddSubTree(line, pos+1);
|
||||
}
|
||||
}
|
||||
else if (token == ' ' || token == ']') {
|
||||
if (value.size() > 0 && ! m_value.size() > 0) {
|
||||
m_value = value;
|
||||
}
|
||||
else if (value.size() > 0) {
|
||||
m_isTerminal = false;
|
||||
TreePointer child(new InternalTree(value, true));
|
||||
m_children.push_back(child);
|
||||
}
|
||||
if (token == ' ') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_children.size() > 0) {
|
||||
m_isTerminal = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
return line.size();
|
||||
}
|
||||
return min(line.size(),pos+1);
|
||||
|
||||
}
|
||||
|
||||
std::string InternalTree::GetString() const {
|
||||
|
||||
std::string ret = " ";
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "[";
|
||||
}
|
||||
|
||||
ret += m_value;
|
||||
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
|
||||
{
|
||||
ret += (*it)->GetString();
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "]";
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
|
||||
|
||||
std::vector<TreePointer>::iterator it;
|
||||
bool found = false;
|
||||
leafNT next_leafNT(this);
|
||||
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
*it = *it_prev;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
FFState* SyntaxConstraintFeature::EvaluateChart(const ChartHypothesis& cur_hypo
|
||||
, int featureID /* used to index the state in the previous hypotheses */
|
||||
, ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
std::string tree;
|
||||
bool found = 0;
|
||||
cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found);
|
||||
|
||||
TreePointer mytree (new InternalTree(tree));
|
||||
|
||||
//get subtrees (in target order)
|
||||
std::vector<TreePointer> previous_trees;
|
||||
for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
|
||||
const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos);
|
||||
if (word.IsNonTerminal()) {
|
||||
size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
||||
const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd);
|
||||
const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID));
|
||||
const TreePointer prev_tree = prev->GetTree();
|
||||
previous_trees.push_back(prev_tree);
|
||||
}
|
||||
}
|
||||
|
||||
mytree->Combine(previous_trees);
|
||||
|
||||
|
||||
return new TreeState(mytree);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
149
moses/FF/SyntaxConstraintFeature.h
Normal file
149
moses/FF/SyntaxConstraintFeature.h
Normal file
@ -0,0 +1,149 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "FFState.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include "util/generator.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class InternalTree;
|
||||
typedef boost::shared_ptr<InternalTree> TreePointer;
|
||||
|
||||
class InternalTree
|
||||
{
|
||||
std::string m_value;
|
||||
std::vector<TreePointer> m_children;
|
||||
bool m_isTerminal;
|
||||
public:
|
||||
InternalTree(const std::string & line, const bool terminal = false);
|
||||
size_t AddSubTree(const std::string & line, size_t start);
|
||||
|
||||
std::string GetString() const;
|
||||
void Combine(const std::vector<TreePointer> &previous);
|
||||
const std::string & GetLabel() const {
|
||||
return m_value;
|
||||
}
|
||||
size_t GetLength() const {
|
||||
return m_children.size();
|
||||
}
|
||||
std::vector<TreePointer> & GetChildren() {
|
||||
return m_children;
|
||||
}
|
||||
void AddChild(TreePointer child) {
|
||||
m_children.push_back(child);
|
||||
}
|
||||
|
||||
bool IsTerminal() const {
|
||||
return m_isTerminal;
|
||||
}
|
||||
|
||||
bool IsLeafNT() const {
|
||||
return (!m_isTerminal && m_children.size() == 0);
|
||||
}
|
||||
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
class TreeState : public FFState
|
||||
{
|
||||
TreePointer m_tree;
|
||||
public:
|
||||
TreeState(TreePointer tree)
|
||||
:m_tree(tree)
|
||||
{}
|
||||
|
||||
TreePointer GetTree() const {
|
||||
return m_tree;
|
||||
}
|
||||
|
||||
int Compare(const FFState& other) const {return 0;};
|
||||
};
|
||||
|
||||
class SyntaxConstraintFeature : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
SyntaxConstraintFeature(const std::string &line)
|
||||
:StatefulFeatureFunction(0, line) {}
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
|
||||
return new TreeState(TreePointer());
|
||||
}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {};
|
||||
void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
|
||||
FFState* Evaluate(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
|
||||
FFState* EvaluateChart(
|
||||
const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID - used to index the state in the previous hypotheses */,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
};
|
||||
|
||||
// Python-like generator that yields next nonterminal leaf on every call
|
||||
$generator(leafNT) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNT(InternalTree* root = 0): tree(root) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(it);
|
||||
}
|
||||
else if ((*it)->GetLength() > 0) {
|
||||
if (&(**it)) { // normal pointer to same object that TreePointer points to
|
||||
$restart(tree = &(**it));
|
||||
}
|
||||
}
|
||||
}
|
||||
$stop;
|
||||
};
|
||||
|
||||
|
||||
// Python-like generator that yields the parent of the next nonterminal leaf on every call
|
||||
$generator(leafNTParent) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNTParent(InternalTree* root = 0): tree(root) {}
|
||||
$emit(InternalTree*)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(tree);
|
||||
}
|
||||
else if ((*it)->GetLength() > 0) {
|
||||
if (&(**it)) { // normal pointer to same object that TreePointer points to
|
||||
$restart(tree = &(**it));
|
||||
}
|
||||
}
|
||||
}
|
||||
$stop;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
34
util/generator.hh
Normal file
34
util/generator.hh
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
// generator/continuation for C++
|
||||
// author: Andrew Fedoniouk @ terrainformatica.com
|
||||
// idea borrowed from: "coroutines in C" Simon Tatham,
|
||||
// http://www.chiark.greenend.org.uk/~sgtatham/coroutines.html
|
||||
// BSD license
|
||||
|
||||
template<typename T>
|
||||
struct _generator
|
||||
{
|
||||
T* _stack;
|
||||
int _line;
|
||||
_generator():_stack(0), _line(-1) {}
|
||||
void _push() { T* n = new T; *n = *static_cast<T*>(this); _stack = n; }
|
||||
bool _pop() { if(!_stack) return false; T* t = _stack; *static_cast<T*>(this) = *_stack; t->_stack = 0; delete t; return true; }
|
||||
~_generator() { while(_pop()); }
|
||||
};
|
||||
|
||||
#define $generator(NAME) struct NAME : public _generator<NAME>
|
||||
|
||||
#define $emit(T) bool operator()(T& _rv) { \
|
||||
if(_line < 0) _line=0; \
|
||||
$START: switch(_line) { case 0:;
|
||||
|
||||
#define $stop } _line = 0; if(_pop()) goto $START; return false; }
|
||||
|
||||
#define $restart(WITH) { _push(); _stack->_line = __LINE__; _line=0; WITH; goto $START; case __LINE__:; }
|
||||
|
||||
#define $yield(V) \
|
||||
do {\
|
||||
_line=__LINE__;\
|
||||
_rv = (V); return true; case __LINE__:;\
|
||||
} while (0)
|
Loading…
Reference in New Issue
Block a user