mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
update extract-mixed-syntax
This commit is contained in:
parent
6d61db28fa
commit
6289b39fd8
@ -18,15 +18,14 @@ AlignedSentence::AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:m_lineNum(lineNum)
|
||||
:m_lineNum(lineNum)
|
||||
{
|
||||
PopulateWordVec(m_source, source);
|
||||
PopulateWordVec(m_target, target);
|
||||
PopulateAlignment(alignment);
|
||||
}
|
||||
|
||||
AlignedSentence::~AlignedSentence()
|
||||
{
|
||||
AlignedSentence::~AlignedSentence() {
|
||||
Moses::RemoveAllInColl(m_source);
|
||||
Moses::RemoveAllInColl(m_target);
|
||||
}
|
||||
|
@ -14,12 +14,11 @@
|
||||
|
||||
class Parameter;
|
||||
|
||||
class AlignedSentence
|
||||
{
|
||||
class AlignedSentence {
|
||||
public:
|
||||
AlignedSentence(int lineNum)
|
||||
:m_lineNum(lineNum) {
|
||||
}
|
||||
:m_lineNum(lineNum)
|
||||
{}
|
||||
|
||||
AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
@ -28,13 +27,11 @@ public:
|
||||
virtual ~AlignedSentence();
|
||||
virtual void Create(const Parameter ¶ms);
|
||||
|
||||
const Phrase &GetPhrase(Moses::FactorDirection direction) const {
|
||||
return (direction == Moses::Input) ? m_source : m_target;
|
||||
}
|
||||
const Phrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
const ConsistentPhrases &GetConsistentPhrases() const {
|
||||
return m_consistentPhrases;
|
||||
}
|
||||
const ConsistentPhrases &GetConsistentPhrases() const
|
||||
{ return m_consistentPhrases; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
|
||||
|
@ -16,15 +16,14 @@ AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:AlignedSentence(lineNum)
|
||||
,m_sourceStr(source)
|
||||
,m_targetStr(target)
|
||||
,m_alignmentStr(alignment)
|
||||
:AlignedSentence(lineNum)
|
||||
,m_sourceStr(source)
|
||||
,m_targetStr(target)
|
||||
,m_alignmentStr(alignment)
|
||||
{
|
||||
}
|
||||
|
||||
AlignedSentenceSyntax::~AlignedSentenceSyntax()
|
||||
{
|
||||
AlignedSentenceSyntax::~AlignedSentenceSyntax() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
@ -43,7 +42,8 @@ void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const P
|
||||
tree.AddToAll(params.hieroNonTerm);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
PopulateWordVec(phrase, line);
|
||||
tree.SetHieroLabel(params.hieroNonTerm);
|
||||
}
|
||||
@ -83,7 +83,8 @@ void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
int childNum = 0;
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
|
||||
{
|
||||
string nodeName = childNode.name();
|
||||
|
||||
// span label
|
||||
|
@ -16,8 +16,8 @@ ConsistentPhrase::ConsistentPhrase(
|
||||
int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms)
|
||||
:corners(4)
|
||||
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
|
||||
:corners(4)
|
||||
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
|
||||
{
|
||||
corners[0] = sourceStart;
|
||||
corners[1] = sourceEnd;
|
||||
@ -25,8 +25,7 @@ ConsistentPhrase::ConsistentPhrase(
|
||||
corners[3] = targetEnd;
|
||||
}
|
||||
|
||||
ConsistentPhrase::~ConsistentPhrase()
|
||||
{
|
||||
ConsistentPhrase::~ConsistentPhrase() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
|
@ -27,19 +27,16 @@ public:
|
||||
|
||||
virtual ~ConsistentPhrase();
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const {
|
||||
return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1;
|
||||
}
|
||||
int GetWidth(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
|
||||
|
||||
|
||||
void AddNonTerms(const std::string &source,
|
||||
const std::string &target);
|
||||
const NonTerms &GetNonTerms() const {
|
||||
return m_nonTerms;
|
||||
}
|
||||
const NonTerm &GetHieroNonTerm() const {
|
||||
return m_hieroNonTerm;
|
||||
}
|
||||
const NonTerms &GetNonTerms() const
|
||||
{ return m_nonTerms;}
|
||||
const NonTerm &GetHieroNonTerm() const
|
||||
{ return m_hieroNonTerm;}
|
||||
|
||||
bool TargetOverlap(const ConsistentPhrase &other) const;
|
||||
|
||||
|
@ -17,8 +17,7 @@ ConsistentPhrases::ConsistentPhrases()
|
||||
{
|
||||
}
|
||||
|
||||
ConsistentPhrases::~ConsistentPhrases()
|
||||
{
|
||||
ConsistentPhrases::~ConsistentPhrases() {
|
||||
for (int start = 0; start < m_coll.size(); ++start) {
|
||||
std::vector<Coll> &allSourceStart = m_coll[start];
|
||||
|
||||
|
@ -14,8 +14,7 @@
|
||||
class Word;
|
||||
class Parameter;
|
||||
|
||||
class ConsistentPhrases
|
||||
{
|
||||
class ConsistentPhrases {
|
||||
public:
|
||||
typedef std::set<ConsistentPhrase*> Coll;
|
||||
|
||||
|
@ -27,12 +27,13 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
InputFileStream::InputFileStream(const std::string &filePath)
|
||||
InputFileStream::InputFileStream(const std::string &filePath)
|
||||
: std::istream(NULL)
|
||||
, m_streambuf(NULL)
|
||||
{
|
||||
{
|
||||
if (filePath.size() > 3 &&
|
||||
filePath.substr(filePath.size() - 3, 3) == ".gz") {
|
||||
filePath.substr(filePath.size() - 3, 3) == ".gz")
|
||||
{
|
||||
m_streambuf = new gzfilebuf(filePath.c_str());
|
||||
} else {
|
||||
std::filebuf* fb = new std::filebuf();
|
||||
@ -44,17 +45,17 @@ InputFileStream::InputFileStream(const std::string &filePath)
|
||||
m_streambuf = fb;
|
||||
}
|
||||
this->init(m_streambuf);
|
||||
}
|
||||
}
|
||||
|
||||
InputFileStream::~InputFileStream()
|
||||
{
|
||||
InputFileStream::~InputFileStream()
|
||||
{
|
||||
delete m_streambuf;
|
||||
m_streambuf = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void InputFileStream::Close()
|
||||
{
|
||||
}
|
||||
void InputFileStream::Close()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -29,19 +29,19 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
||||
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
||||
*/
|
||||
class InputFileStream : public std::istream
|
||||
{
|
||||
protected:
|
||||
class InputFileStream : public std::istream
|
||||
{
|
||||
protected:
|
||||
std::streambuf *m_streambuf;
|
||||
public:
|
||||
public:
|
||||
|
||||
InputFileStream(const std::string &filePath);
|
||||
~InputFileStream();
|
||||
|
||||
void Close();
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,8 @@ int main(int argc, char** argv)
|
||||
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
|
||||
("MinScope", po::value<int>()->default_value(params.minScope), "min scope.")
|
||||
|
||||
("SpanLength", "Property - span length of RHS each non-term")
|
||||
("SpanLength", "Property - span length of each LHS non-term")
|
||||
("RuleLength", "Property - length of entire rule. Only for rules with NTs")
|
||||
|
||||
("NonTermContext", "Property - (source) left and right, inside and outside words of each non-term ")
|
||||
("NonTermContextTarget", "Property - (target) left and right, inside and outside words of each non-term")
|
||||
@ -61,13 +62,15 @@ int main(int argc, char** argv)
|
||||
|
||||
|
||||
po::variables_map vm;
|
||||
try {
|
||||
try
|
||||
{
|
||||
po::store(po::parse_command_line(argc, argv, desc),
|
||||
vm); // can throw
|
||||
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") || argc < 5 ) {
|
||||
if ( vm.count("help") || argc < 5 )
|
||||
{
|
||||
std::cout << argv[0] << " target source alignment [options...]" << std::endl
|
||||
<< desc << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
@ -75,7 +78,9 @@ int main(int argc, char** argv)
|
||||
|
||||
po::notify(vm); // throws on error, so do after help in case
|
||||
// there are any problems
|
||||
} catch(po::error& e) {
|
||||
}
|
||||
catch(po::error& e)
|
||||
{
|
||||
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
@ -103,6 +108,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// properties
|
||||
if (vm.count("SpanLength")) params.spanLength = true;
|
||||
if (vm.count("RuleLength")) params.ruleLength = true;
|
||||
if (vm.count("NonTermContext")) params.nonTermContext = true;
|
||||
if (vm.count("NonTermContextTarget")) params.nonTermContextTarget = true;
|
||||
if (vm.count("NonTermContextFactor")) params.nonTermContextFactor = vm["NonTermContextFactor"].as<int>();
|
||||
@ -166,7 +172,8 @@ int main(int argc, char** argv)
|
||||
|
||||
if (params.sourceSyntax || params.targetSyntax) {
|
||||
alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
}
|
||||
|
||||
|
@ -16,16 +16,15 @@ using namespace std;
|
||||
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
|
||||
const std::string &source,
|
||||
const std::string &target)
|
||||
:m_consistentPhrase(&consistentPhrase)
|
||||
,m_source(source)
|
||||
,m_target(target)
|
||||
:m_consistentPhrase(&consistentPhrase)
|
||||
,m_source(source)
|
||||
,m_target(target)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
NonTerm::~NonTerm()
|
||||
{
|
||||
NonTerm::~NonTerm() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
@ -64,6 +63,4 @@ bool NonTerm::IsHiero(const Parameter ¶ms) const
|
||||
}
|
||||
|
||||
int NonTerm::GetWidth(Moses::FactorDirection direction) const
|
||||
{
|
||||
return GetConsistentPhrase().GetWidth(direction);
|
||||
}
|
||||
{ return GetConsistentPhrase().GetWidth(direction); }
|
||||
|
@ -21,19 +21,16 @@ public:
|
||||
const std::string &target);
|
||||
virtual ~NonTerm();
|
||||
|
||||
const ConsistentPhrase &GetConsistentPhrase() const {
|
||||
return *m_consistentPhrase;
|
||||
}
|
||||
const ConsistentPhrase &GetConsistentPhrase() const
|
||||
{ return *m_consistentPhrase; }
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const;
|
||||
|
||||
virtual bool IsNonTerm() const {
|
||||
return true;
|
||||
}
|
||||
virtual bool IsNonTerm() const
|
||||
{ return true; }
|
||||
|
||||
std::string GetString() const {
|
||||
return m_source + m_target;
|
||||
}
|
||||
std::string GetString() const
|
||||
{ return m_source + m_target; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
virtual void Output(std::ostream &out) const;
|
||||
|
@ -11,46 +11,46 @@
|
||||
using namespace std;
|
||||
|
||||
Parameter::Parameter()
|
||||
:maxSpan(10)
|
||||
,minSpan(0)
|
||||
,maxNonTerm(2)
|
||||
,maxHieroNonTerm(999)
|
||||
,maxSymbolsTarget(999)
|
||||
,maxSymbolsSource(5)
|
||||
,minHoleSource(2)
|
||||
,minHoleSourceSyntax(1)
|
||||
,sentenceOffset(0)
|
||||
,nonTermConsecSource(false)
|
||||
,requireAlignedWord(true)
|
||||
,fractionalCounting(true)
|
||||
,gzOutput(false)
|
||||
:maxSpan(10)
|
||||
,minSpan(0)
|
||||
,maxNonTerm(2)
|
||||
,maxHieroNonTerm(999)
|
||||
,maxSymbolsTarget(999)
|
||||
,maxSymbolsSource(5)
|
||||
,minHoleSource(2)
|
||||
,minHoleSourceSyntax(1)
|
||||
,sentenceOffset(0)
|
||||
,nonTermConsecSource(false)
|
||||
,requireAlignedWord(true)
|
||||
,fractionalCounting(true)
|
||||
,gzOutput(false)
|
||||
|
||||
,hieroNonTerm("[X]")
|
||||
,sourceSyntax(false)
|
||||
,targetSyntax(false)
|
||||
,hieroNonTerm("[X]")
|
||||
,sourceSyntax(false)
|
||||
,targetSyntax(false)
|
||||
|
||||
,mixedSyntaxType(0)
|
||||
,multiLabel(0)
|
||||
,nonTermConsecSourceMixed(true)
|
||||
,hieroSourceLHS(false)
|
||||
,maxSpanFreeNonTermSource(0)
|
||||
,nieceTerminal(true)
|
||||
,maxScope(UNDEFINED)
|
||||
,minScope(0)
|
||||
,mixedSyntaxType(0)
|
||||
,multiLabel(0)
|
||||
,nonTermConsecSourceMixed(true)
|
||||
,hieroSourceLHS(false)
|
||||
,maxSpanFreeNonTermSource(0)
|
||||
,nieceTerminal(true)
|
||||
,maxScope(UNDEFINED)
|
||||
,minScope(0)
|
||||
|
||||
,spanLength(false)
|
||||
,nonTermContext(false)
|
||||
,nonTermContextTarget(false)
|
||||
,nonTermContextFactor(0)
|
||||
,spanLength(false)
|
||||
,ruleLength(false)
|
||||
,nonTermContext(false)
|
||||
,nonTermContextTarget(false)
|
||||
,nonTermContextFactor(0)
|
||||
|
||||
,numSourceFactors(1)
|
||||
,numTargetFactors(1)
|
||||
,numSourceFactors(1)
|
||||
,numTargetFactors(1)
|
||||
|
||||
,nonTermConsecSourceMixedSyntax(1)
|
||||
,nonTermConsecSourceMixedSyntax(1)
|
||||
{}
|
||||
|
||||
Parameter::~Parameter()
|
||||
{
|
||||
Parameter::~Parameter() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
|
@ -48,6 +48,7 @@ public:
|
||||
|
||||
// properties
|
||||
bool spanLength;
|
||||
bool ruleLength;
|
||||
bool nonTermContext;
|
||||
bool nonTermContextTarget;
|
||||
int nonTermContextFactor;
|
||||
|
@ -7,12 +7,12 @@
|
||||
class Phrase : public std::vector<Word*>
|
||||
{
|
||||
public:
|
||||
Phrase() {
|
||||
}
|
||||
Phrase()
|
||||
{}
|
||||
|
||||
Phrase(size_t size)
|
||||
:std::vector<Word*>(size) {
|
||||
}
|
||||
:std::vector<Word*>(size)
|
||||
{}
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
|
2
phrase-extract/extract-mixed-syntax/Property.cpp
Normal file
2
phrase-extract/extract-mixed-syntax/Property.cpp
Normal file
@ -0,0 +1,2 @@
|
||||
#include "Property.h"
|
||||
|
14
phrase-extract/extract-mixed-syntax/Property.h
Normal file
14
phrase-extract/extract-mixed-syntax/Property.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
typedef std::string Property;
|
||||
|
||||
/*
|
||||
class Property
|
||||
{
|
||||
public:
|
||||
std::string str;
|
||||
|
||||
};
|
||||
*/
|
@ -16,35 +16,32 @@
|
||||
using namespace std;
|
||||
|
||||
Rule::Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence)
|
||||
:m_lhs(lhsNonTerm)
|
||||
,m_alignedSentence(alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
:m_lhs(lhsNonTerm)
|
||||
,m_alignedSentence(alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
{
|
||||
CreateSource();
|
||||
}
|
||||
|
||||
Rule::Rule(const Rule ©, const NonTerm &nonTerm)
|
||||
:m_lhs(copy.m_lhs)
|
||||
,m_alignedSentence(copy.m_alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
,m_nonterms(copy.m_nonterms)
|
||||
:m_lhs(copy.m_lhs)
|
||||
,m_alignedSentence(copy.m_alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
,m_nonterms(copy.m_nonterms)
|
||||
{
|
||||
m_nonterms.push_back(&nonTerm);
|
||||
CreateSource();
|
||||
|
||||
}
|
||||
|
||||
Rule::~Rule()
|
||||
{
|
||||
Rule::~Rule() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
const ConsistentPhrase &Rule::GetConsistentPhrase() const
|
||||
{
|
||||
return m_lhs.GetConsistentPhrase();
|
||||
}
|
||||
{ return m_lhs.GetConsistentPhrase(); }
|
||||
|
||||
void Rule::CreateSource()
|
||||
{
|
||||
@ -70,7 +67,8 @@ void Rule::CreateSource()
|
||||
// move to next non-term
|
||||
++nonTermInd;
|
||||
cp = (nonTermInd < m_nonterms.size()) ? m_nonterms[nonTermInd] : NULL;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// terminal
|
||||
ruleSymbol = m_alignedSentence.GetPhrase(Moses::Input)[sourcePos];
|
||||
}
|
||||
@ -84,7 +82,8 @@ int Rule::GetNextSourcePosForNonTerm() const
|
||||
if (m_nonterms.empty()) {
|
||||
// no non-terms so far. Can start next non-term on left corner
|
||||
return m_lhs.GetConsistentPhrase().corners[0];
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// next non-term can start just left of previous
|
||||
const ConsistentPhrase &cp = m_nonterms.back()->GetConsistentPhrase();
|
||||
int nextPos = cp.corners[1] + 1;
|
||||
@ -122,7 +121,7 @@ std::string Rule::Debug() const
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void Rule::Output(std::ostream &out, bool forward, const Parameter ¶ms) const
|
||||
void Rule::Output(std::ostream &out, bool forward) const
|
||||
{
|
||||
if (forward) {
|
||||
// source
|
||||
@ -134,7 +133,8 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter ¶ms) cons
|
||||
// target
|
||||
m_target.Output(out);
|
||||
m_lhs.Output(out, Moses::Output);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// target
|
||||
m_target.Output(out);
|
||||
m_lhs.Output(out, Moses::Output);
|
||||
@ -155,7 +155,8 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter ¶ms) cons
|
||||
|
||||
if (forward) {
|
||||
out << alignPair.first << "-" << alignPair.second << " ";
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
out << alignPair.second << "-" << alignPair.first << " ";
|
||||
}
|
||||
}
|
||||
@ -168,47 +169,12 @@ void Rule::Output(std::ostream &out, bool forward, const Parameter ¶ms) cons
|
||||
out << " ||| ";
|
||||
|
||||
// properties
|
||||
|
||||
// span length
|
||||
if (forward && params.spanLength && m_nonterms.size()) {
|
||||
out << "{{SpanLength ";
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
out << i << "," << cp.GetWidth(Moses::Input) << "," << cp.GetWidth(Moses::Output) << " ";
|
||||
if (forward) {
|
||||
for (size_t i = 0; i < m_properties.size(); ++i) {
|
||||
const Property &prop = m_properties[i];
|
||||
out << prop << " ";
|
||||
}
|
||||
out << "}} ";
|
||||
}
|
||||
|
||||
// non-term context (source)
|
||||
if (forward && params.nonTermContext && m_nonterms.size()) {
|
||||
out << "{{NonTermContext ";
|
||||
|
||||
int factor = params.nonTermContextFactor;
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
NonTermContext(1, factor, i, cp, out);
|
||||
}
|
||||
out << "}} ";
|
||||
}
|
||||
|
||||
// non-term context (target)
|
||||
if (forward && params.nonTermContextTarget && m_nonterms.size()) {
|
||||
out << "{{NonTermContextTarget ";
|
||||
|
||||
int factor = params.nonTermContextFactor;
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
NonTermContext(2, factor, i, cp, out);
|
||||
}
|
||||
out << "}} ";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Rule::NonTermContextFactor(int factor, const Word &word, std::ostream &out) const
|
||||
@ -225,11 +191,13 @@ void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const Cons
|
||||
startPos = cp.corners[0];
|
||||
endPos = cp.corners[1];
|
||||
phrase = &m_alignedSentence.GetPhrase(Moses::Input);
|
||||
} else if (sourceTarget == 2) {
|
||||
}
|
||||
else if (sourceTarget == 2) {
|
||||
startPos = cp.corners[2];
|
||||
endPos = cp.corners[3];
|
||||
phrase = &m_alignedSentence.GetPhrase(Moses::Output);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
abort();
|
||||
}
|
||||
|
||||
@ -238,7 +206,8 @@ void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const Cons
|
||||
// left outside
|
||||
if (startPos == 0) {
|
||||
out << "<s> ";
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
NonTermContextFactor(factor, *phrase->at(startPos - 1), out);
|
||||
}
|
||||
|
||||
@ -251,7 +220,8 @@ void Rule::NonTermContext(int sourceTarget, int factor, size_t ntInd, const Cons
|
||||
// right outside
|
||||
if (endPos == phrase->size() - 1) {
|
||||
out << "</s> ";
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
NonTermContextFactor(factor, *phrase->at(endPos + 1), out);
|
||||
}
|
||||
|
||||
@ -279,7 +249,8 @@ void Rule::Prevalidate(const Parameter ¶ms)
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
} else if (sourceWidth < params.minHoleSourceSyntax) {
|
||||
}
|
||||
else if (sourceWidth < params.minHoleSourceSyntax) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
@ -329,7 +300,8 @@ void Rule::Prevalidate(const Parameter ¶ms)
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// Hieu's mixed syntax
|
||||
switch (params.nonTermConsecSourceMixedSyntax) {
|
||||
case 0:
|
||||
@ -470,7 +442,8 @@ void Rule::Prevalidate(const Parameter ¶ms)
|
||||
int scope = GetScope(params);
|
||||
if (scope >= params.scopeSpan.size()) {
|
||||
// no constraint on it. It's ok
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
const std::pair<int,int> &constraint = params.scopeSpan[scope];
|
||||
int sourceWidth = m_lhs.GetWidth(Moses::Input);
|
||||
if (sourceWidth < constraint.first || sourceWidth > constraint.second) {
|
||||
@ -595,7 +568,8 @@ void Rule::CreateTarget(const Parameter ¶ms)
|
||||
// move to next non-term
|
||||
++nonTermInd;
|
||||
cp = (nonTermInd < targetNonTerm.size()) ? targetNonTerm[nonTermInd] : NULL;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// terminal
|
||||
ruleSymbol = m_alignedSentence.GetPhrase(Moses::Output)[targetPos];
|
||||
}
|
||||
@ -619,7 +593,8 @@ void Rule::CreateAlignments()
|
||||
const Word &sourceWord = static_cast<const Word&>(*symbol);
|
||||
const std::set<const Word *> &targetWords = sourceWord.GetAlignment();
|
||||
CreateAlignments(sourcePos, targetWords);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// non-terms. same object in both source & target
|
||||
CreateAlignments(sourcePos, symbol);
|
||||
}
|
||||
@ -650,3 +625,70 @@ void Rule::CreateAlignments(int sourcePos, const RuleSymbol *targetSought)
|
||||
throw "not found";
|
||||
}
|
||||
|
||||
void Rule::CreateProperties(const Parameter ¶ms)
|
||||
{
|
||||
//cerr << Debug() << " " << m_nonterms.size() << endl;
|
||||
|
||||
// span length
|
||||
if (params.spanLength && m_nonterms.size()) {
|
||||
stringstream strme;
|
||||
strme << "{{SpanLength ";
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
strme << i << "," << cp.GetWidth(Moses::Input) << "," << cp.GetWidth(Moses::Output) << " ";
|
||||
}
|
||||
strme << "}}";
|
||||
|
||||
m_properties.push_back(strme.str());
|
||||
}
|
||||
|
||||
if (params.ruleLength && m_nonterms.size()) {
|
||||
const ConsistentPhrase &cp = m_lhs.GetConsistentPhrase();
|
||||
|
||||
stringstream strme;
|
||||
strme << "{{RuleLength ";
|
||||
strme << cp.GetWidth(Moses::Input);
|
||||
strme << "}}";
|
||||
|
||||
m_properties.push_back(strme.str());
|
||||
}
|
||||
|
||||
// non-term context (source)
|
||||
if (params.nonTermContext && m_nonterms.size()) {
|
||||
stringstream strme;
|
||||
strme << "{{NonTermContext ";
|
||||
|
||||
int factor = params.nonTermContextFactor;
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
NonTermContext(1, factor, i, cp, strme);
|
||||
}
|
||||
strme << "}}";
|
||||
|
||||
m_properties.push_back(strme.str());
|
||||
}
|
||||
|
||||
// non-term context (target)
|
||||
if (params.nonTermContextTarget && m_nonterms.size()) {
|
||||
stringstream strme;
|
||||
strme << "{{NonTermContextTarget ";
|
||||
|
||||
int factor = params.nonTermContextFactor;
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
NonTermContext(2, factor, i, cp, strme);
|
||||
}
|
||||
strme << "}}";
|
||||
|
||||
m_properties.push_back(strme.str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <vector>
|
||||
#include "Phrase.h"
|
||||
#include "RulePhrase.h"
|
||||
#include "Property.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class ConsistentPhrase;
|
||||
@ -16,10 +17,10 @@ class NonTerm;
|
||||
class Parameter;
|
||||
|
||||
|
||||
class Rule
|
||||
{
|
||||
class Rule {
|
||||
public:
|
||||
typedef std::set<std::pair<int,int> > Alignments;
|
||||
typedef std::vector<Property> Properties;
|
||||
|
||||
Rule(const Rule ©); // do not implement
|
||||
|
||||
@ -31,42 +32,39 @@ public:
|
||||
|
||||
virtual ~Rule();
|
||||
|
||||
bool IsValid() const {
|
||||
return m_isValid;
|
||||
}
|
||||
bool IsValid() const
|
||||
{ return m_isValid; }
|
||||
|
||||
bool CanRecurse() const {
|
||||
return m_canRecurse;
|
||||
}
|
||||
bool CanRecurse() const
|
||||
{ return m_canRecurse; }
|
||||
|
||||
const NonTerm &GetLHS() const {
|
||||
return m_lhs;
|
||||
}
|
||||
const NonTerm &GetLHS() const
|
||||
{ return m_lhs; }
|
||||
|
||||
const ConsistentPhrase &GetConsistentPhrase() const;
|
||||
|
||||
int GetNextSourcePosForNonTerm() const;
|
||||
|
||||
void SetCount(float count) {
|
||||
m_count = count;
|
||||
}
|
||||
float GetCount() const {
|
||||
return m_count;
|
||||
}
|
||||
void SetCount(float count)
|
||||
{ m_count = count; }
|
||||
float GetCount() const
|
||||
{ return m_count; }
|
||||
|
||||
const Alignments &GetAlignments() const {
|
||||
return m_alignments;
|
||||
}
|
||||
const Alignments &GetAlignments() const
|
||||
{ return m_alignments; }
|
||||
|
||||
const Properties &GetProperties() const
|
||||
{ return m_properties; }
|
||||
|
||||
std::string Debug() const;
|
||||
void Output(std::ostream &out, bool forward, const Parameter ¶ms) const;
|
||||
void Output(std::ostream &out, bool forward) const;
|
||||
|
||||
void Prevalidate(const Parameter ¶ms);
|
||||
void CreateTarget(const Parameter ¶ms);
|
||||
void CreateProperties(const Parameter ¶ms);
|
||||
|
||||
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const {
|
||||
return (direction == Moses::Input) ? m_source : m_target;
|
||||
}
|
||||
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
protected:
|
||||
const NonTerm &m_lhs;
|
||||
@ -81,6 +79,9 @@ protected:
|
||||
|
||||
bool m_isValid, m_canRecurse;
|
||||
|
||||
// should be in consistent order, for comparisons
|
||||
Properties m_properties;
|
||||
|
||||
void CreateSource();
|
||||
void CreateAlignments();
|
||||
void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
|
||||
|
@ -21,11 +21,11 @@ public:
|
||||
typedef std::vector<const RuleSymbol*> Coll;
|
||||
Coll m_coll;
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_coll.size();
|
||||
}
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
void Add(const RuleSymbol *symbol) {
|
||||
void Add(const RuleSymbol *symbol)
|
||||
{
|
||||
m_coll.push_back(symbol);
|
||||
}
|
||||
|
||||
|
@ -9,14 +9,12 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
RuleSymbol::RuleSymbol()
|
||||
{
|
||||
RuleSymbol::RuleSymbol() {
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
RuleSymbol::~RuleSymbol()
|
||||
{
|
||||
RuleSymbol::~RuleSymbol() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
@ -31,7 +29,8 @@ int RuleSymbol::Compare(const RuleSymbol &other) const
|
||||
|
||||
if (str == otherStr) {
|
||||
return 0;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
return (str < otherStr) ? -1 : +1;
|
||||
}
|
||||
}
|
||||
|
@ -12,8 +12,7 @@
|
||||
#include <string>
|
||||
|
||||
// base class - terminal or non-term
|
||||
class RuleSymbol
|
||||
{
|
||||
class RuleSymbol {
|
||||
public:
|
||||
RuleSymbol();
|
||||
virtual ~RuleSymbol();
|
||||
|
@ -19,12 +19,11 @@ using namespace std;
|
||||
extern bool g_debug;
|
||||
|
||||
Rules::Rules(const AlignedSentence &alignedSentence)
|
||||
:m_alignedSentence(alignedSentence)
|
||||
:m_alignedSentence(alignedSentence)
|
||||
{
|
||||
}
|
||||
|
||||
Rules::~Rules()
|
||||
{
|
||||
Rules::~Rules() {
|
||||
Moses::RemoveAllInColl(m_keepRules);
|
||||
}
|
||||
|
||||
@ -34,7 +33,8 @@ void Rules::CreateRules(const ConsistentPhrase &cp,
|
||||
if (params.hieroSourceLHS) {
|
||||
const NonTerm &nonTerm = cp.GetHieroNonTerm();
|
||||
CreateRule(nonTerm, params);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = nonTerms[i];
|
||||
@ -50,7 +50,7 @@ void Rules::CreateRule(const NonTerm &nonTerm,
|
||||
|
||||
rule->Prevalidate(params);
|
||||
rule->CreateTarget(params);
|
||||
|
||||
rule->CreateProperties(params);
|
||||
|
||||
if (rule->CanRecurse()) {
|
||||
Extend(*rule, params);
|
||||
@ -58,7 +58,8 @@ void Rules::CreateRule(const NonTerm &nonTerm,
|
||||
|
||||
if (rule->IsValid()) {
|
||||
m_keepRules.insert(rule);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
delete rule;
|
||||
}
|
||||
|
||||
@ -121,6 +122,7 @@ void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter
|
||||
Rule *newRule = new Rule(rule, nonTerm);
|
||||
newRule->Prevalidate(params);
|
||||
newRule->CreateTarget(params);
|
||||
newRule->CreateProperties(params);
|
||||
|
||||
if (newRule->CanRecurse()) {
|
||||
// recursively extend
|
||||
@ -129,7 +131,8 @@ void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter
|
||||
|
||||
if (newRule->IsValid()) {
|
||||
m_keepRules.insert(newRule);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
delete newRule;
|
||||
}
|
||||
}
|
||||
@ -154,7 +157,7 @@ void Rules::Output(std::ostream &out, bool forward, const Parameter ¶ms) con
|
||||
std::set<Rule*, CompareRules>::const_iterator iter;
|
||||
for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
|
||||
const Rule &rule = **iter;
|
||||
rule.Output(out, forward, params);
|
||||
rule.Output(out, forward);
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
@ -163,7 +166,8 @@ void Rules::Consolidate(const Parameter ¶ms)
|
||||
{
|
||||
if (params.fractionalCounting) {
|
||||
CalcFractionalCount();
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
std::set<Rule*>::iterator iter;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
Rule &rule = **iter;
|
||||
|
@ -18,7 +18,8 @@ class AlignedSentence;
|
||||
class Parameter;
|
||||
|
||||
struct CompareRules {
|
||||
bool operator()(const Rule *a, const Rule *b) {
|
||||
bool operator()(const Rule *a, const Rule *b)
|
||||
{
|
||||
int compare;
|
||||
|
||||
compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
|
||||
@ -35,12 +36,15 @@ struct CompareRules {
|
||||
return a->GetLHS().GetString() < b->GetLHS().GetString();
|
||||
}
|
||||
|
||||
if (a->GetProperties() != b->GetProperties()) {
|
||||
return a->GetProperties() < b->GetProperties();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class Rules
|
||||
{
|
||||
class Rules {
|
||||
public:
|
||||
Rules(const AlignedSentence &alignedSentence);
|
||||
virtual ~Rules();
|
||||
|
@ -18,7 +18,8 @@ void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const P
|
||||
// delete the label in collection and add new
|
||||
assert(labels.size() == 1);
|
||||
labels.clear();
|
||||
} else if (params.multiLabel == 2) {
|
||||
}
|
||||
else if (params.multiLabel == 2) {
|
||||
// ignore this label
|
||||
add = false;
|
||||
}
|
||||
|
@ -11,15 +11,14 @@
|
||||
using namespace std;
|
||||
|
||||
Word::Word(int pos, const std::string &str)
|
||||
:m_pos(pos)
|
||||
,m_str(str)
|
||||
:m_pos(pos)
|
||||
,m_str(str)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Word::~Word()
|
||||
{
|
||||
Word::~Word() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
|
@ -18,25 +18,21 @@ public:
|
||||
Word(int pos, const std::string &str);
|
||||
virtual ~Word();
|
||||
|
||||
virtual bool IsNonTerm() const {
|
||||
return false;
|
||||
}
|
||||
virtual bool IsNonTerm() const
|
||||
{ return false; }
|
||||
|
||||
std::string GetString() const {
|
||||
return m_str;
|
||||
}
|
||||
std::string GetString() const
|
||||
{ return m_str; }
|
||||
|
||||
std::string GetString(int factor) const;
|
||||
|
||||
int GetPos() const {
|
||||
return m_pos;
|
||||
}
|
||||
int GetPos() const
|
||||
{ return m_pos; }
|
||||
|
||||
void AddAlignment(const Word *other);
|
||||
|
||||
const std::set<const Word *> &GetAlignment() const {
|
||||
return m_alignment;
|
||||
}
|
||||
const std::set<const Word *> &GetAlignment() const
|
||||
{ return m_alignment; }
|
||||
|
||||
std::set<int> GetAlignmentIndex() const;
|
||||
|
||||
|
@ -5,18 +5,15 @@
|
||||
#include <zlib.h>
|
||||
#include <cstring>
|
||||
|
||||
class gzfilebuf : public std::streambuf
|
||||
{
|
||||
class gzfilebuf : public std::streambuf {
|
||||
public:
|
||||
gzfilebuf(const char *filename) {
|
||||
_gzf = gzopen(filename, "rb");
|
||||
gzfilebuf(const char *filename)
|
||||
{ _gzf = gzopen(filename, "rb");
|
||||
setg (_buff+sizeof(int), // beginning of putback area
|
||||
_buff+sizeof(int), // read position
|
||||
_buff+sizeof(int)); // end position
|
||||
}
|
||||
~gzfilebuf() {
|
||||
gzclose(_gzf);
|
||||
}
|
||||
~gzfilebuf() { gzclose(_gzf); }
|
||||
protected:
|
||||
virtual int_type overflow (int_type c) {
|
||||
throw;
|
||||
@ -29,8 +26,7 @@ protected:
|
||||
throw;
|
||||
}
|
||||
|
||||
virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ) {
|
||||
throw;
|
||||
virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
|
||||
}
|
||||
|
||||
//read one character
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user