phrase-extract: add syntax-common sub-library

And remove some (near-)duplicate code from pcfg-common and score-stsg.
This commit is contained in:
Phil Williams 2014-12-07 14:27:51 +00:00
parent d966a0492b
commit 60e56efc6b
58 changed files with 805 additions and 528 deletions

View File

@ -173,7 +173,7 @@ project : requirements
;
#Add directories here if you want their incidental targets too (i.e. tests).
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd mira scripts regression-testing ;
build-projects lm util phrase-extract phrase-extract/syntax-common search moses moses/LM mert moses-cmd mira scripts regression-testing ;
if [ option.get "with-mm" : : "yes" ]
{

View File

@ -1 +1 @@
lib pcfg_common : [ glob *.cc ] ..//deps : <include>.. ;
lib pcfg_common : [ glob *.cc ] ..//syntax-common ..//deps : <include>.. ;

View File

@ -1,46 +0,0 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_EXCEPTION_H_
#define PCFG_EXCEPTION_H_
#include <string>
namespace Moses
{
namespace PCFG
{
class Exception
{
public:
Exception(const char *msg) : msg_(msg) {}
Exception(const std::string &msg) : msg_(msg) {}
const std::string &msg() const {
return msg_;
}
private:
std::string msg_;
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -1,126 +0,0 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_NUMBERED_SET_H_
#define PCFG_NUMBERED_SET_H_
#include "exception.h"
#include <boost/unordered_map.hpp>
#include <limits>
#include <sstream>
#include <vector>
namespace Moses
{
namespace PCFG
{
// Stores a set of elements of type T, each of which is allocated an integral
// ID of type I. IDs are contiguous starting at 0. Individual elements cannot
// be removed once inserted (but the whole set can be cleared).
template<typename T, typename I=std::size_t>
class NumberedSet
{
private:
typedef boost::unordered_map<T, I> ElementToIdMap;
typedef std::vector<const T *> IdToElementMap;
public:
typedef I IdType;
typedef typename IdToElementMap::const_iterator const_iterator;
NumberedSet() {}
const_iterator begin() const {
return id_to_element_.begin();
}
const_iterator end() const {
return id_to_element_.end();
}
// Static value
static I NullId() {
return std::numeric_limits<I>::max();
}
bool Empty() const {
return id_to_element_.empty();
}
std::size_t Size() const {
return id_to_element_.size();
}
// Insert the given object and return its ID.
I Insert(const T &);
I Lookup(const T &) const;
const T &Lookup(I) const;
void Clear();
private:
ElementToIdMap element_to_id_;
IdToElementMap id_to_element_;
};
template<typename T, typename I>
I NumberedSet<T, I>::Lookup(const T &s) const
{
typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
return (p == element_to_id_.end()) ? NullId() : p->second;
}
template<typename T, typename I>
const T &NumberedSet<T, I>::Lookup(I id) const
{
if (id < 0 || id >= id_to_element_.size()) {
std::ostringstream msg;
msg << "Value not found: " << id;
throw Exception(msg.str());
}
return *(id_to_element_[id]);
}
template<typename T, typename I>
I NumberedSet<T, I>::Insert(const T &x)
{
std::pair<T, I> value(x, id_to_element_.size());
std::pair<typename ElementToIdMap::iterator, bool> result =
element_to_id_.insert(value);
if (result.second) {
// x is a new element.
id_to_element_.push_back(&result.first->first);
}
return result.first->second;
}
template<typename T, typename I>
void NumberedSet<T, I>::Clear()
{
element_to_id_.clear();
id_to_element_.clear();
}
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -19,14 +19,15 @@
#include "pcfg.h"
#include "exception.h"
#include <cassert>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <cassert>
#include "syntax-common/exception.h"
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
void Pcfg::Add(const Key &key, double score) {
@ -103,4 +104,5 @@ void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -21,21 +21,19 @@
#ifndef PCFG_PCFG_H_
#define PCFG_PCFG_H_
#include "typedef.h"
#include <istream>
#include <map>
#include <ostream>
#include <vector>
namespace Moses
{
namespace PCFG
{
#include "typedef.h"
class Pcfg
{
public:
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class Pcfg {
public:
typedef std::vector<std::size_t> Key;
typedef std::map<Key, double> Map;
typedef Map::iterator iterator;
@ -43,30 +41,23 @@ public:
Pcfg() {}
iterator begin() {
return rules_.begin();
}
const_iterator begin() const {
return rules_.begin();
}
iterator begin() { return rules_.begin(); }
const_iterator begin() const { return rules_.begin(); }
iterator end() {
return rules_.end();
}
const_iterator end() const {
return rules_.end();
}
iterator end() { return rules_.end(); }
const_iterator end() const { return rules_.end(); }
void Add(const Key &, double);
bool Lookup(const Key &, double &) const;
void Read(std::istream &, Vocabulary &);
void Write(const Vocabulary &, std::ostream &) const;
private:
private:
Map rules_;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -21,48 +21,40 @@
#ifndef PCFG_PCFG_TREE_H_
#define PCFG_PCFG_TREE_H_
#include <string>
#include "syntax_tree.h"
#include "xml_tree_writer.h"
#include <string>
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
template<typename DerivedType>
class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType>
{
public:
class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
public:
typedef std::string LabelType;
typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
double score() const {
return score_;
}
void set_score(double s) {
score_ = s;
}
double score() const { return score_; }
void set_score(double s) { score_ = s; }
private:
private:
double score_;
};
class PcfgTree : public PcfgTreeBase<PcfgTree>
{
public:
class PcfgTree : public PcfgTreeBase<PcfgTree> {
public:
typedef PcfgTreeBase<PcfgTree> BaseType;
PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
};
// Specialise XmlOutputHandler for PcfgTree.
template<>
class XmlOutputHandler<PcfgTree>
{
public:
class XmlOutputHandler<PcfgTree> {
public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const PcfgTree &tree, std::string &label) const {
@ -81,6 +73,7 @@ public:
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -24,16 +24,14 @@
#include <cassert>
#include <vector>
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
// Base class for SyntaxTree, AgreementTree, and friends.
template<typename T, typename DerivedType>
class SyntaxTreeBase
{
public:
class SyntaxTreeBase {
public:
// Constructors
SyntaxTreeBase(const T &label)
: label_(label)
@ -48,54 +46,33 @@ public:
// Destructor
virtual ~SyntaxTreeBase();
const T &label() const {
return label_;
}
const DerivedType *parent() const {
return parent_;
}
DerivedType *parent() {
return parent_;
}
const std::vector<DerivedType *> &children() const {
return children_;
}
std::vector<DerivedType *> &children() {
return children_;
}
const T &label() const { return label_; }
const DerivedType *parent() const { return parent_; }
DerivedType *parent() { return parent_; }
const std::vector<DerivedType *> &children() const { return children_; }
std::vector<DerivedType *> &children() { return children_; }
void set_label(const T &label) {
label_ = label;
}
void set_parent(DerivedType *parent) {
parent_ = parent;
}
void set_children(const std::vector<DerivedType *> &c) {
children_ = c;
}
void set_label(const T &label) { label_ = label; }
void set_parent(DerivedType *parent) { parent_ = parent; }
void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
bool IsLeaf() const {
return children_.empty();
}
bool IsLeaf() const { return children_.empty(); }
bool IsPreterminal() const {
return children_.size() == 1 && children_[0]->IsLeaf();
}
void AddChild(DerivedType *child) {
children_.push_back(child);
}
void AddChild(DerivedType *child) { children_.push_back(child); }
private:
private:
T label_;
std::vector<DerivedType *> children_;
DerivedType *parent_;
};
template<typename T>
class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> >
{
public:
class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
public:
typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
SyntaxTree(const T &label) : BaseType(label) {}
SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
@ -103,14 +80,14 @@ public:
};
template<typename T, typename DerivedType>
SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase()
{
SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
for (std::size_t i = 0; i < children_.size(); ++i) {
delete children_[i];
}
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -21,7 +21,8 @@
#include <sstream>
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
std::istream &Tool::OpenInputOrDie(const std::string &filename) {
@ -77,4 +78,5 @@ void Tool::OpenNamedOutputOrDie(const std::string &filename,
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -21,30 +21,26 @@
#ifndef PCFG_TOOL_H_
#define PCFG_TOOL_H_
#include <boost/program_options/cmdline.hpp>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
namespace Moses
{
namespace PCFG
{
#include <boost/program_options/cmdline.hpp>
class Tool
{
public:
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class Tool {
public:
virtual ~Tool() {}
const std::string &name() const {
return name_;
}
const std::string &name() const { return name_; }
virtual int Main(int argc, char *argv[]) = 0;
protected:
protected:
Tool(const std::string &name) : name_(name) {}
// Returns the boost::program_options style that should be used by all tools.
@ -82,7 +78,7 @@ protected:
// the file cannot be opened for writing.
void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
private:
private:
std::string name_;
std::istream *input_ptr_;
std::ifstream input_file_stream_;
@ -91,6 +87,7 @@ private:
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -21,19 +21,19 @@
#ifndef PCFG_TYPEDEF_H_
#define PCFG_TYPEDEF_H_
#include "numbered_set.h"
#include "syntax_tree.h"
#include <string>
namespace Moses
{
namespace PCFG
{
#include "syntax-common/numbered_set.h"
#include "syntax_tree.h"
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
typedef NumberedSet<std::string> Vocabulary;
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -19,25 +19,23 @@
#include "xml_tree_parser.h"
#include "exception.h"
#include <cassert>
#include <vector>
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include <cassert>
#include <vector>
#include "syntax-common/exception.h"
using namespace MosesTraining;
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
XmlTreeParser::XmlTreeParser()
{
XmlTreeParser::XmlTreeParser() {
}
std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
{
std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line) {
m_line = line;
m_tree.Clear();
try {
@ -60,8 +58,7 @@ std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
// Converts a SyntaxNode tree to a Moses::PCFG::PcfgTree.
std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
const SyntaxNode &tree,
const std::vector<std::string> &words)
{
const std::vector<std::string> &words) {
std::auto_ptr<PcfgTree> root(new PcfgTree(tree.GetLabel()));
const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) {
@ -87,4 +84,5 @@ std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -21,28 +21,26 @@
#ifndef PCFG_XML_TREE_PARSER_H_
#define PCFG_XML_TREE_PARSER_H_
#include "pcfg_tree.h"
#include "SyntaxTree.h"
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
namespace Moses
{
namespace PCFG
{
#include "pcfg_tree.h"
#include "SyntaxTree.h"
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
// Parses a string in Moses' XML parse tree format and returns a PcfgTree
// object.
class XmlTreeParser
{
public:
class XmlTreeParser {
public:
XmlTreeParser();
std::auto_ptr<PcfgTree> Parse(const std::string &);
private:
private:
std::auto_ptr<PcfgTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
@ -54,6 +52,7 @@ private:
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -21,10 +21,6 @@
#ifndef PCFG_XML_TREE_WRITER_H_
#define PCFG_XML_TREE_WRITER_H_
#include "syntax_tree.h"
#include "XmlTree.h"
#include <cassert>
#include <map>
#include <memory>
@ -32,15 +28,17 @@
#include <vector>
#include <string>
namespace Moses
{
namespace PCFG
{
#include "XmlTree.h"
#include "syntax_tree.h"
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
template<typename InputTree>
class XmlOutputHandler
{
public:
class XmlOutputHandler {
public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const InputTree &, std::string &) const;
@ -48,19 +46,17 @@ public:
};
template<typename InputTree>
class XmlTreeWriter : public XmlOutputHandler<InputTree>
{
public:
class XmlTreeWriter : public XmlOutputHandler<InputTree> {
public:
typedef XmlOutputHandler<InputTree> Base;
void Write(const InputTree &, std::ostream &) const;
private:
private:
std::string Escape(const std::string &) const;
};
template<typename InputTree>
void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
std::ostream &out) const
{
std::ostream &out) const {
assert(!tree.IsLeaf());
// Opening tag
@ -104,8 +100,7 @@ void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
// Escapes XML special characters.
template<typename InputTree>
std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const
{
std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
std::string t;
std::size_t len = s.size();
t.reserve(len);
@ -134,6 +129,7 @@ std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -20,6 +20,6 @@
#include "pcfg_extract.h"
int main(int argc, char *argv[]) {
Moses::PCFG::PcfgExtract tool;
MosesTraining::Syntax::PCFG::PcfgExtract tool;
return tool.Main(argc, argv);
}

View File

@ -23,16 +23,16 @@
#include <string>
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
struct Options {
std::string corpus_file;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -23,7 +23,8 @@
#include "rule_collection.h"
#include "rule_extractor.h"
#include "pcfg-common/exception.h"
#include "syntax-common/exception.h"
#include "pcfg-common/pcfg.h"
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/syntax_tree.h"
@ -42,7 +43,8 @@
#include <string>
#include <vector>
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
int PcfgExtract::Main(int argc, char *argv[]) {
@ -128,4 +130,5 @@ void PcfgExtract::ProcessOptions(int argc, char *argv[],
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -23,15 +23,13 @@
#include "pcfg-common/tool.h"
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class Options;
struct Options;
class PcfgExtract : public Tool
{
class PcfgExtract : public Tool {
public:
PcfgExtract() : Tool("pcfg-extract") {}
virtual int Main(int, char *[]);
@ -40,6 +38,7 @@ private:
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -23,7 +23,8 @@
#include <cmath>
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
void RuleCollection::Add(std::size_t lhs, const std::vector<std::size_t> &rhs) {
@ -55,4 +56,5 @@ void RuleCollection::CreatePcfg(Pcfg &pcfg) {
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -21,21 +21,19 @@
#ifndef PCFG_EXTRACT_RULE_COLLECTION_H_
#define PCFG_EXTRACT_RULE_COLLECTION_H_
#include "pcfg-common/pcfg.h"
#include <vector>
#include <boost/unordered_map.hpp>
#include <vector>
#include "pcfg-common/pcfg.h"
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
// Contains PCFG rules and their counts.
class RuleCollection
{
public:
class RuleCollection {
public:
typedef boost::unordered_map<std::vector<std::size_t>, std::size_t> RhsCountMap;
typedef boost::unordered_map<std::size_t, RhsCountMap> Map;
typedef Map::iterator iterator;
@ -43,28 +41,21 @@ public:
RuleCollection() {}
iterator begin() {
return collection_.begin();
}
const_iterator begin() const {
return collection_.begin();
}
iterator begin() { return collection_.begin(); }
const_iterator begin() const { return collection_.begin(); }
iterator end() {
return collection_.end();
}
const_iterator end() const {
return collection_.end();
}
iterator end() { return collection_.end(); }
const_iterator end() const { return collection_.end(); }
void Add(std::size_t, const std::vector<std::size_t> &);
void CreatePcfg(Pcfg &);
private:
private:
Map collection_;
};
} // namespace PCFG
} // namespace Moses
} // namespace Synatx
} // namespace MosesTraining
#endif

View File

@ -21,7 +21,8 @@
#include "pcfg-common/pcfg_tree.h"
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
RuleExtractor::RuleExtractor(Vocabulary &non_term_vocab)
@ -48,4 +49,5 @@ void RuleExtractor::Extract(const PcfgTree &tree, RuleCollection &rc) const {
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -21,28 +21,27 @@
#ifndef PCFG_EXTRACT_RULE_EXTRACTOR_H_
#define PCFG_EXTRACT_RULE_EXTRACTOR_H_
#include "rule_collection.h"
#include "pcfg-common/typedef.h"
namespace Moses
{
namespace PCFG
{
#include "rule_collection.h"
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class PcfgTree;
// Extracts PCFG rules from syntax trees and adds them to a RuleCollection.
class RuleExtractor
{
public:
class RuleExtractor {
public:
RuleExtractor(Vocabulary &);
void Extract(const PcfgTree &, RuleCollection &) const;
private:
private:
Vocabulary &non_term_vocab_;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -20,6 +20,6 @@
#include "pcfg_score.h"
int main(int argc, char *argv[]) {
Moses::PCFG::PcfgScore tool;
MosesTraining::Syntax::PCFG::PcfgScore tool;
return tool.Main(argc, argv);
}

View File

@ -23,16 +23,16 @@
#include <string>
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
struct Options {
std::string pcfg_file;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -19,18 +19,6 @@
#include "pcfg_score.h"
#include "options.h"
#include "tree_scorer.h"
#include "pcfg-common/exception.h"
#include "pcfg-common/pcfg.h"
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/syntax_tree.h"
#include "pcfg-common/typedef.h"
#include "pcfg-common/xml_tree_parser.h"
#include <boost/program_options.hpp>
#include <cassert>
#include <cstdlib>
#include <fstream>
@ -40,8 +28,21 @@
#include <set>
#include <string>
#include <vector>
#include "options.h"
#include "tree_scorer.h"
namespace Moses {
#include <boost/program_options.hpp>
#include "syntax-common/exception.h"
#include "pcfg-common/pcfg.h"
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/syntax_tree.h"
#include "pcfg-common/typedef.h"
#include "pcfg-common/xml_tree_parser.h"
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
int PcfgScore::Main(int argc, char *argv[]) {
@ -149,4 +150,5 @@ void PcfgScore::ProcessOptions(int argc, char *argv[], Options &options) const {
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -23,23 +23,22 @@
#include "pcfg-common/tool.h"
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class Options;
struct Options;
class PcfgScore : public Tool
{
public:
class PcfgScore : public Tool {
public:
PcfgScore() : Tool("pcfg-score") {}
virtual int Main(int, char *[]);
private:
private:
void ProcessOptions(int, char *[], Options &) const;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -21,7 +21,8 @@
#include <cassert>
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
TreeScorer::TreeScorer(const Pcfg &pcfg, const Vocabulary &non_term_vocab)
@ -65,4 +66,5 @@ bool TreeScorer::Score(PcfgTree &root) const {
}
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -25,26 +25,25 @@
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/typedef.h"
namespace Moses
{
namespace PCFG
{
namespace MosesTraining {
namespace Syntax {
namespace PCFG {
class TreeScorer
{
public:
class TreeScorer {
public:
TreeScorer(const Pcfg &, const Vocabulary &);
// Score tree according to PCFG. Returns false if unsuccessful (due to
// missing rule).
bool Score(PcfgTree &) const;
private:
private:
const Pcfg &pcfg_;
const Vocabulary &non_term_vocab_;
};
} // namespace PCFG
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining
#endif

View File

@ -77,7 +77,7 @@ void init(int argc, char* argv[])
if (argc < 2) {
cerr << "syntax: relax-parse < in-parse > out-parse ["
<< " --LeftBinarize | ---RightBinarize |"
<< " --LeftBinarize | --RightBinarize |"
<< " --SAMT 1-4 ]" << endl;
exit(1);
}

View File

@ -1,23 +0,0 @@
#pragma once
#include <string>
namespace Moses
{
namespace ScoreStsg
{
class Exception
{
public:
Exception(const char *msg) : m_msg(msg) {}
Exception(const std::string &msg) : m_msg(msg) {}
const std::string &GetMsg() const {
return m_msg;
}
private:
std::string m_msg;
};
} // namespace ScoreStsg
} // namespace Moses

View File

@ -1 +1 @@
exe score-stsg : [ glob *.cpp ] ..//deps ../..//boost_iostreams ../..//boost_program_options ../..//z : <include>.. ;
exe score-stsg : [ glob *.cpp ] ..//syntax-common ..//deps ../..//boost_iostreams ../..//boost_program_options ../..//z : <include>.. ;

View File

@ -5,7 +5,9 @@
#include <cstdlib>
#include <iostream>
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -50,4 +52,5 @@ void LexicalTable::Load(std::istream &input)
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -8,7 +8,9 @@
#include "Vocabulary.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -40,4 +42,5 @@ private:
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -2,6 +2,6 @@
int main(int argc, char *argv[])
{
Moses::ScoreStsg::ScoreStsg tool;
MosesTraining::Syntax::ScoreStsg::ScoreStsg tool;
return tool.Main(argc, argv);
}

View File

@ -2,7 +2,9 @@
#include <string>
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -38,4 +40,5 @@ public:
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -1,6 +1,8 @@
#include "RuleGroup.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -39,4 +41,5 @@ void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -6,7 +6,9 @@
#include "util/string_piece.hh"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -65,4 +67,5 @@ private:
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -2,7 +2,9 @@
#include "util/string_piece.hh"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -14,4 +16,5 @@ struct RuleSymbol
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -12,14 +12,15 @@
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
#include "Exception.h"
#include "InputFileStream.h"
#include "LexicalTable.h"
#include "OutputFileStream.h"
#include "Options.h"
#include "RuleGroup.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -75,4 +76,5 @@ void RuleTableWriter::WriteRuleHalf(const TokenizedRuleHalf &half)
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -8,7 +8,9 @@
#include "Options.h"
#include "TokenizedRuleHalf.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -16,7 +18,7 @@ namespace ScoreStsg
class RuleTableWriter
{
public:
RuleTableWriter(const Options &options, OutputFileStream &out)
RuleTableWriter(const Options &options, Moses::OutputFileStream &out)
: m_options(options)
, m_out(out) {}
@ -34,8 +36,9 @@ private:
void WriteRuleHalf(const TokenizedRuleHalf &);
const Options &m_options;
OutputFileStream &m_out;
Moses::OutputFileStream &m_out;
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -15,15 +15,19 @@
#include "util/string_piece_hash.hh"
#include "util/tokenize_piece.hh"
#include "Exception.h"
#include "InputFileStream.h"
#include "LexicalTable.h"
#include "OutputFileStream.h"
#include "syntax-common/exception.h"
#include "LexicalTable.h"
#include "Options.h"
#include "RuleGroup.h"
#include "RuleTableWriter.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -44,12 +48,12 @@ int ScoreStsg::Main(int argc, char *argv[])
ProcessOptions(argc, argv, m_options);
// Open input files.
InputFileStream extractStream(m_options.extractFile);
InputFileStream lexStream(m_options.lexFile);
Moses::InputFileStream extractStream(m_options.extractFile);
Moses::InputFileStream lexStream(m_options.lexFile);
// Open output files.
OutputFileStream outStream;
OutputFileStream countOfCountsStream;
Moses::OutputFileStream outStream;
Moses::OutputFileStream countOfCountsStream;
OpenOutputFileOrDie(m_options.tableFile, outStream);
if (m_options.goodTuring || m_options.kneserNey) {
OpenOutputFileOrDie(m_options.tableFile+".coc", countOfCountsStream);
@ -161,7 +165,7 @@ void ScoreStsg::ProcessRuleGroupOrDie(const RuleGroup &group,
} catch (const Exception &e) {
std::ostringstream msg;
msg << "failed to process rule group at lines " << start << "-" << end
<< ": " << e.GetMsg();
<< ": " << e.msg();
Error(msg.str());
} catch (const std::exception &e) {
std::ostringstream msg;
@ -228,7 +232,7 @@ void ScoreStsg::ProcessRuleGroup(const RuleGroup &group,
}
void ScoreStsg::ParseAlignmentString(const std::string &s, int numTgtWords,
MosesTraining::ALIGNMENT &tgtToSrc)
ALIGNMENT &tgtToSrc)
{
tgtToSrc.clear();
tgtToSrc.resize(numTgtWords);
@ -262,7 +266,7 @@ void ScoreStsg::ParseAlignmentString(const std::string &s, int numTgtWords,
double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
const std::vector<RuleSymbol> &targetFrontier,
const MosesTraining::ALIGNMENT &tgtToSrc)
const ALIGNMENT &tgtToSrc)
{
double lexScore = 1.0;
for (std::size_t i = 0; i < targetFrontier.size(); ++i) {
@ -293,7 +297,7 @@ double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
}
void ScoreStsg::OpenOutputFileOrDie(const std::string &filename,
OutputFileStream &stream)
Moses::OutputFileStream &stream)
{
bool ret = stream.Open(filename);
if (!ret) {
@ -437,4 +441,5 @@ void ScoreStsg::Error(const std::string &msg) const
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -7,6 +7,7 @@
#include <vector>
#include "ExtractionPhrasePair.h"
#include "OutputFileStream.h"
#include "LexicalTable.h"
#include "Options.h"
@ -14,11 +15,10 @@
#include "TokenizedRuleHalf.h"
#include "Vocabulary.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
class OutputFileStream;
namespace ScoreStsg
{
@ -41,14 +41,14 @@ private:
double ComputeLexProb(const std::vector<RuleSymbol> &,
const std::vector<RuleSymbol> &,
const MosesTraining::ALIGNMENT &);
const ALIGNMENT &);
void Error(const std::string &) const;
void OpenOutputFileOrDie(const std::string &, OutputFileStream &);
void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
void ParseAlignmentString(const std::string &, int,
MosesTraining::ALIGNMENT &);
ALIGNMENT &);
void ProcessOptions(int, char *[], Options &) const;
@ -68,8 +68,9 @@ private:
int m_totalDistinct;
TokenizedRuleHalf m_sourceHalf;
TokenizedRuleHalf m_targetHalf;
MosesTraining::ALIGNMENT m_tgtToSrc;
ALIGNMENT m_tgtToSrc;
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -1,6 +1,8 @@
#include "TokenizedRuleHalf.h"
namespace Moses
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -34,4 +36,5 @@ bool TokenizedRuleHalf::IsTree() const
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -3,10 +3,13 @@
#include <string>
#include <vector>
#include "RuleSymbol.h"
#include "TreeFragmentTokenizer.h"
#include "syntax-common/tree_fragment_tokenizer.h"
namespace Moses
#include "RuleSymbol.h"
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
@ -42,4 +45,5 @@ struct TokenizedRuleHalf
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -2,12 +2,14 @@
#include <string>
#include "NumberedSet.h"
#include "syntax-common/numbered_set.h"
namespace Moses {
namespace MosesTraining {
namespace Syntax {
namespace ScoreStsg {
typedef NumberedSet<std::string, std::size_t> Vocabulary;
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,8 @@
lib syntax_common : [ glob *.cc : *_test.cc ] ..//deps : <include>.. ;
import testing ;
for local t in [ glob *_test.cc ] {
local name = [ MATCH "(.*)\.cc" : $(t) ] ;
unit-test $(name) : $(t) syntax_common /top//boost_unit_test_framework /top//boost_system ;
}

View File

@ -0,0 +1,20 @@
#pragma once
#include <string>
namespace MosesTraining {
namespace Syntax {
class Exception {
public:
Exception(const char *msg) : msg_(msg) {}
Exception(const std::string &msg) : msg_(msg) {}
const std::string &msg() const { return msg_; }
private:
std::string msg_;
};
} // namespace Syntax
} // namespace MosesTraining

View File

@ -6,10 +6,10 @@
#include <boost/unordered_map.hpp>
#include "Exception.h"
#include "exception.h"
namespace Moses {
namespace ScoreStsg {
namespace MosesTraining {
namespace Syntax {
// Stores a set of elements of type T, each of which is allocated an integral
// ID of type I. IDs are contiguous starting at 0. Individual elements cannot
@ -106,5 +106,5 @@ void NumberedSet<T, I>::Clear() {
id_to_element_.clear();
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,13 @@
#pragma once
#include <string>
#include "tree.h"
namespace MosesTraining {
namespace Syntax {
typedef Tree<std::string> StringTree;
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,115 @@
#pragma once
#include <stack>
#include <vector>
namespace MosesTraining {
namespace Syntax {
template<typename T>
Tree<T>::~Tree() {
for (typename std::vector<Tree *>::iterator p = children_.begin();
p != children_.end(); ++p) {
delete *p;
}
}
template<typename T>
void Tree<T>::SetParents() {
for (typename std::vector<Tree *>::iterator p = children_.begin();
p != children_.end(); ++p) {
(*p)->parent() = this;
(*p)->SetParents();
}
}
template<typename T>
std::size_t Tree<T>::Depth() const {
std::size_t depth = 0;
Tree *ancestor = parent_;
while (ancestor != 0) {
++depth;
ancestor = ancestor->parent_;
}
return depth;
}
template<typename T>
class Tree<T>::PreOrderIterator {
public:
PreOrderIterator();
PreOrderIterator(Tree<T> &);
Tree<T> &operator*() { return *node_; }
Tree<T> *operator->() { return node_; }
PreOrderIterator &operator++();
PreOrderIterator operator++(int);
bool operator==(const Tree<T>::PreOrderIterator &);
bool operator!=(const Tree<T>::PreOrderIterator &);
private:
// Pointer to the current node.
Tree<T> *node_;
// Stack of indices defining the position of node_ within the child vectors
// of its ancestors.
std::stack<std::size_t> index_stack_;
};
template<typename T>
Tree<T>::PreOrderIterator::PreOrderIterator()
: node_(0) {
}
template<typename T>
Tree<T>::PreOrderIterator::PreOrderIterator(Tree<T> &t)
: node_(&t) {
}
template<typename T>
typename Tree<T>::PreOrderIterator &Tree<T>::PreOrderIterator::operator++() {
// If the current node has children then visit the left-most child next.
if (!node_->children().empty()) {
index_stack_.push(0);
node_ = node_->children()[0];
return *this;
}
// Otherwise, try node's ancestors until either a node is found with a
// sibling to the right or we reach the root (in which case the traversal
// is complete).
Tree<T> *ancestor = node_->parent_;
while (ancestor) {
std::size_t index = index_stack_.top();
index_stack_.pop();
if (index+1 < ancestor->children_.size()) {
index_stack_.push(index+1);
node_ = ancestor->children()[index+1];
return *this;
}
ancestor = ancestor->parent_;
}
node_ = 0;
return *this;
}
template<typename T>
typename Tree<T>::PreOrderIterator Tree<T>::PreOrderIterator::operator++(int) {
PreOrderIterator tmp(*this);
++*this;
return tmp;
}
template<typename T>
bool Tree<T>::PreOrderIterator::operator==(const PreOrderIterator &rhs) {
return node_ == rhs.node_;
}
template<typename T>
bool Tree<T>::PreOrderIterator::operator!=(const PreOrderIterator &rhs) {
return node_ != rhs.node_;
}
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,91 @@
#pragma once
#include <vector>
namespace MosesTraining {
namespace Syntax {
// A basic k-ary tree with node values of type T. Each node has a vector of
// pointers to its children and a pointer to its parent (or 0 for the root).
//
// See the unit tests in tree_test.cc for examples of usage.
//
// Note: a Tree owns its children: it will delete them on destruction.
//
// Note: it's the user's responsibility to ensure that parent and child pointers
// are correctly set and maintained. A convenient(-ish) way of building a
// properly-connected tree is to add all the nodes as children of their
// respective parents (using the children() accessor) and then call
// SetParents() on the root at the end.
//
template<typename T>
class Tree {
public:
// Constructors
Tree()
: value_()
, children_()
, parent_(0) {}
Tree(const T &value)
: value_(value)
, children_()
, parent_(0) {}
// Destructor (deletes children)
~Tree();
// Access tree's value.
const T &value() const { return value_; }
T &value() { return value_; }
// Access tree's parent.
const Tree *parent() const { return parent_; }
Tree *&parent() { return parent_; }
// Access tree's children.
const std::vector<Tree *> &children() const { return children_; }
std::vector<Tree *> &children() { return children_; }
// Set the parent values for this subtree (excluding this node).
void SetParents();
// Leaf predicate.
bool IsLeaf() const { return children_.empty(); }
// Calculate the depth of this node within the tree (where the root has a
// depth of 0, root's children have a depth 1, etc).
std::size_t Depth() const;
// Iterators
//
// All iterators are forward iterators. Example use:
//
// Tree<int> &root = GetMeATree();
// for (Tree<int>::PreOrderIterator p(root);
// p != Tree<int>::PreOrderIterator(); ++p) {
// std::cout << p->value() << " ";
// }
// Pre-order iterators.
class PreOrderIterator;
// class ConstPreOrderIterator; TODO
// Post-order iterators.
// class PostOrderIterator; TODO
// class ConstPostOrderIterator; TODO
// Leaf iterators (left-to-right).
// class LeafIterator; TODO
// class ConstLeafIterator; TODO
private:
T value_;
std::vector<Tree *> children_;
Tree *parent_;
};
} // namespace Syntax
} // namespace MosesTraining
#include "tree-inl.h"

View File

@ -1,10 +1,10 @@
#include "TreeFragmentTokenizer.h"
#include "tree_fragment_tokenizer.h"
#include <cctype>
namespace Moses
namespace MosesTraining
{
namespace ScoreStsg
namespace Syntax
{
TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
@ -86,5 +86,5 @@ bool operator!=(const TreeFragmentTokenizer &lhs,
return !(lhs == rhs);
}
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -2,10 +2,8 @@
#include "util/string_piece.hh"
namespace Moses
{
namespace ScoreStsg
{
namespace MosesTraining {
namespace Syntax {
enum TreeFragmentTokenType {
TreeFragmentToken_EOS,
@ -24,10 +22,11 @@ struct TreeFragmentToken {
// Tokenizes tree fragment strings in Moses format.
//
// For example, the string "[NP [NP [NN a]] [NP]]" is tokenized to the sequence:
// For example, the string "[S [NP [NN weasels]] [VP]]" is tokenized to the
// sequence:
//
// 1 LSB "["
// 2 WORD "NP"
// 2 WORD "S"
// 3 LSB "["
// 4 WORD "NP"
// 5 LSB "["
@ -36,7 +35,7 @@ struct TreeFragmentToken {
// 8 RSB "]"
// 9 RSB "]"
// 10 LSB "["
// 11 WORD "NP"
// 11 WORD "VP"
// 12 RSB "]"
// 13 RSB "]"
// 14 EOS undefined
@ -66,5 +65,5 @@ class TreeFragmentTokenizer {
std::size_t pos_;
};
} // namespace ScoreStsg
} // namespace Moses
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,74 @@
#include "tree_fragment_tokenizer.h"
#define BOOST_TEST_MODULE TreeTest
#include <boost/test/unit_test.hpp>
#include <boost/scoped_ptr.hpp>
namespace MosesTraining {
namespace Syntax {
namespace {
BOOST_AUTO_TEST_CASE(tokenize_empty) {
const std::string fragment = "";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.empty());
}
BOOST_AUTO_TEST_CASE(tokenize_space) {
const std::string fragment = " [ weasel weasel ] [] ] wea[sel";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.size() == 10);
BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[0].value == "[");
BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[1].value == "weasel");
BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[2].value == "weasel");
BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[3].value == "]");
BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[4].value == "[");
BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[5].value == "]");
BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[6].value == "]");
BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[7].value == "wea");
BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[8].value == "[");
BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[9].value == "sel");
}
BOOST_AUTO_TEST_CASE(tokenize_fragment) {
const std::string fragment = "[S [NP [NN weasels]] [VP]]";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.size() == 13);
BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[10].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[11].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[12].type == TreeFragmentToken_RSB);
}
} // namespace
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,66 @@
#include "tree.h"
#define BOOST_TEST_MODULE TreeTest
#include <boost/test/unit_test.hpp>
#include <boost/scoped_ptr.hpp>
namespace MosesTraining {
namespace Syntax {
namespace {
// Test Tree<>::PreOrderIterator with a trivial, single-node tree.
BOOST_AUTO_TEST_CASE(pre_order_1) {
boost::scoped_ptr<Tree<int> > root(new Tree<int>(123));
Tree<int>::PreOrderIterator p(*root);
BOOST_REQUIRE(p != Tree<int>::PreOrderIterator());
BOOST_REQUIRE(p->value() == 123);
++p;
BOOST_REQUIRE(p == Tree<int>::PreOrderIterator());
}
// Test Tree<>::PreOrderIterator on this tree: (1 (2 3) (4) (5 6 (7 8)))
BOOST_AUTO_TEST_CASE(pre_order_2) {
boost::scoped_ptr<Tree<int> > root(new Tree<int>(1));
root->children().push_back(new Tree<int>(2));
root->children()[0]->children().push_back(new Tree<int>(3));
root->children().push_back(new Tree<int>(4));
root->children().push_back(new Tree<int>(5));
root->children()[2]->children().push_back(new Tree<int>(6));
root->children()[2]->children().push_back(new Tree<int>(7));
root->children()[2]->children()[1]->children().push_back(new Tree<int>(8));
root->SetParents();
Tree<int>::PreOrderIterator p(*root);
Tree<int>::PreOrderIterator end;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 1);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 2);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 3);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 4);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 5);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 6);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 7);
++p;
BOOST_REQUIRE(p != end);
BOOST_REQUIRE(p->value() == 8);
++p;
BOOST_REQUIRE(p == end);
}
} // namespace
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,59 @@
#include "xml_tree_parser.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include <cassert>
#include <vector>
namespace MosesTraining {
namespace Syntax {
StringTree *XmlTreeParser::Parse(const std::string &line) {
line_ = line;
tree_.Clear();
try {
if (!ProcessAndStripXMLTags(line_, tree_, label_set_, top_label_set_,
false)) {
throw Exception("");
}
} catch (const XmlException &e) {
throw Exception(e.getMsg());
}
tree_.ConnectNodes();
SyntaxNode *root = tree_.GetTop();
assert(root);
words_ = tokenize(line_.c_str());
return ConvertTree(*root, words_);
}
// Converts a SyntaxNode tree to a StringTree.
StringTree *XmlTreeParser::ConvertTree(const SyntaxNode &tree,
const std::vector<std::string> &words) {
StringTree *root = new StringTree(tree.GetLabel());
const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) {
if (tree.GetStart() != tree.GetEnd()) {
std::ostringstream msg;
msg << "leaf node covers multiple words (" << tree.GetStart()
<< "-" << tree.GetEnd() << "): this is currently unsupported";
throw Exception(msg.str());
}
StringTree *leaf = new StringTree(words[tree.GetStart()]);
leaf->parent() = root;
root->children().push_back(leaf);
} else {
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
p != children.end(); ++p) {
assert(*p);
StringTree *child = ConvertTree(**p, words);
child->parent() = root;
root->children().push_back(child);
}
}
return root;
}
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,34 @@
#pragma once
#include <map>
#include <set>
#include <string>
#include <vector>
#include "SyntaxTree.h"
#include "exception.h"
#include "string_tree.h"
namespace MosesTraining {
namespace Syntax {
// Parses a string in Moses' XML parse tree format and returns a StringTree
// object. This is a wrapper around the ProcessAndStripXMLTags function.
class XmlTreeParser {
public:
StringTree *Parse(const std::string &);
private:
static StringTree *ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
std::set<std::string> label_set_;
std::map<std::string, int> top_label_set_;
std::string line_;
MosesTraining::SyntaxTree tree_;
std::vector<std::string> words_;
};
} // namespace Syntax
} // namespace MosesTraining

View File

@ -40,6 +40,8 @@ sub Beautify($)
next if ($name eq "srilm");
next if ($name eq "irstlm");
next if ($name eq "UG");
next if ($name eq "pcfg-common");
next if ($name eq "syntax-common");
$name = $path ."/" .$name;
if (-d $name) {