filter-rule-table: stopgap (non-) filtering for T2S/SCFG

This commit is contained in:
Phil Williams 2015-02-23 11:27:20 +00:00
parent 0bff50449e
commit 0346fbb138
4 changed files with 79 additions and 2 deletions

View File

@ -22,6 +22,7 @@
#include "StringCfgFilter.h"
#include "StringForest.h"
#include "StringForestParser.h"
#include "TreeCfgFilter.h"
#include "TreeTsgFilter.h"
namespace MosesTraining
@ -85,7 +86,9 @@ int FilterRuleTable::Main(int argc, char *argv[])
ReadTestSet(testStream, testTrees);
if (sourceSideRuleFormat == kCfg) {
// TODO Implement TreeCfgFilter
Error("tree/cfg filtering algorithm not supported yet");
Warn("tree/cfg filtering algorithm not implemented: input will be copied unchanged to output");
TreeCfgFilter filter(testTrees);
filter.Filter(std::cin, std::cout);
} else if (sourceSideRuleFormat == kTsg) {
TreeTsgFilter filter(testTrees);
filter.Filter(std::cin, std::cout);
@ -227,10 +230,15 @@ void FilterRuleTable::ProcessOptions(int argc, char *argv[],
void FilterRuleTable::Error(const std::string &msg) const
{
std::cerr << GetName() << ": " << msg << std::endl;
std::cerr << GetName() << ": error: " << msg << std::endl;
std::exit(1);
}
void FilterRuleTable::Warn(const std::string &msg) const
{
std::cerr << GetName() << ": warning: " << msg << std::endl;
}
} // namespace FilterRuleTable
} // namespace Syntax
} // namespace MosesTraining

View File

@ -52,6 +52,7 @@ private:
void ReadTestSet(std::istream &,
std::vector<boost::shared_ptr<StringForest> > &);
void Warn(const std::string &) const;
std::string m_name;
};

View File

@ -0,0 +1,30 @@
#include "TreeCfgFilter.h"
#include <algorithm>
#include "util/string_piece_hash.hh"
namespace MosesTraining
{
namespace Syntax
{
namespace FilterRuleTable
{
TreeCfgFilter::TreeCfgFilter(
const std::vector<boost::shared_ptr<StringTree> > &sentences)
{
}
void TreeCfgFilter::Filter(std::istream &in, std::ostream &out)
{
// TODO Implement filtering!
std::string line;
while (std::getline(in, line)) {
out << line << std::endl;
}
}
} // namespace FilterRuleTable
} // namespace Syntax
} // namespace MosesTraining

View File

@ -0,0 +1,38 @@
#pragma once
#include <istream>
#include <ostream>
#include <string>
#include <vector>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
#include "syntax-common/numbered_set.h"
#include "syntax-common/string_tree.h"
#include "syntax-common/tree.h"
#include "syntax-common/tree_fragment_tokenizer.h"
#include "CfgFilter.h"
namespace MosesTraining
{
namespace Syntax
{
namespace FilterRuleTable
{
// Filters a rule table, discarding rules that cannot be applied to a given
// test set. The rule table must have a TSG source-side and the test sentences
// must be parse trees.
class TreeCfgFilter : public CfgFilter {
public:
// Initialize the filter for a given set of test sentences.
TreeCfgFilter(const std::vector<boost::shared_ptr<StringTree> > &);
void Filter(std::istream &in, std::ostream &out);
};
} // namespace FilterRuleTable
} // namespace Syntax
} // namespace MosesTraining