Hierarchical reordering stuff now compiles.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/hierarchical-reo@2699 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
chardmeier 2010-01-28 12:12:57 +00:00
parent a3e9af21c3
commit 9c33fa2289
28 changed files with 113 additions and 184 deletions

View File

@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
#include <queue>
#include <set>
#include <vector>

View File

@ -80,7 +80,7 @@ bool ConfusionNet::ReadF(std::istream& in,
case 0: return ReadFormat0(in,factorOrder);
case 1: return ReadFormat1(in,factorOrder);
default:
stringstream strme;
std::stringstream strme;
strme << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
UserMessage::Add(strme.str());
@ -209,7 +209,7 @@ Phrase ConfusionNet::GetSubString(const WordsRange&) const {
//return Phrase(Input);
}
std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint) const{ //not well defined yet
std::string ConfusionNet::GetStringRep(const std::vector<FactorType> factorsToPrint) const{ //not well defined yet
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return "";
}

View File

@ -28,6 +28,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
using namespace std;
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev)
: DecodeStep(dict, prev)
{

View File

@ -126,7 +126,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
VERBOSE(3,"\t" << targetPhrase << "\n");
}
VERBOSE(3,endl);
VERBOSE(3,std::endl);
}
}

View File

@ -2,6 +2,7 @@
#include "GlobalLexicalModel.h"
#include "StaticData.h"
#include "InputFileStream.h"
#include "UserMessage.h"
namespace Moses
{

View File

@ -33,7 +33,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "GenerationDictionary.h"
#include "LanguageModelSingleFactor.h"
#include "ScoreComponentCollection.h"
#include "LexicalReordering.h"
#include "InputType.h"
#include "ObjectPool.h"
@ -47,6 +46,7 @@ class WordsRange;
class Hypothesis;
class FFState;
class Manager;
class LexicalReordering;
typedef std::vector<Hypothesis*> ArcList;

View File

@ -8,6 +8,7 @@
namespace Moses
{
using namespace std;
class Manager;

View File

@ -60,7 +60,7 @@ bool LanguageModelIRST::Load(const std::string &filePath,
float weight,
size_t nGramOrder)
{
char *SepString = " \t\n";
const char *SepString = " \t\n";
cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";
FactorCollection &factorCollection = FactorCollection::Instance();

View File

@ -4,6 +4,7 @@
#include "NGramNode.h"
#include "InputFileStream.h"
#include "StaticData.h"
#include "UserMessage.h"
using namespace std;

View File

@ -155,7 +155,7 @@ bool LexicalReordering::DecodeNumFeatureFunctions(std::string configElement) {
return false;
}
Score LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const {
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const {
return m_table->GetScore(f, e, Phrase(Output));
}

View File

@ -46,7 +46,9 @@ public:
const FFState* EmptyHypothesisState() const;
virtual std::string GetScoreProducerDescription() const;
virtual std::string GetScoreProducerDescription() const {
return "Lexical reordering model of type " + m_modelTypeString;
}
std::string GetScoreProducerWeightShortName() const {
return "d";
@ -57,7 +59,7 @@ public:
m_table->InitializeForInput(i);
}
Score GetProb(const Phrase& f, const Phrase& e) const;
Scores GetProb(const Phrase& f, const Phrase& e) const;
private:
bool DecodeCondition(std::string s);

View File

@ -8,6 +8,7 @@
#include "WordsRange.h"
#include "ReorderingStack.h"
#include "LexicalReordering.h"
#include "LexicalReorderingState.h"
namespace Moses {
@ -27,13 +28,11 @@ namespace Moses {
LexicalReorderingState* LexicalReorderingState::CreateLexicalReorderingState(std::vector<std::string>& config, LexicalReordering::Direction dir) {
LexicalReorderingState* LexicalReorderingState::CreateLexicalReorderingState(const std::vector<std::string>& config, LexicalReordering::Direction dir) {
ModelType mt = None;
bool phraseBased = true;
assert(dir != LexicalReordering::Bidirectional);
for (int i=0; i<config.size(); ++i) {
if (config[i] == "hier") {
phraseBased == false;

View File

@ -24,7 +24,7 @@ class LexicalReorderingState : public FFState {
virtual int Compare(const FFState& o) const = 0;
virtual LexicalReorderingState* Expand(const Hypothesis& hypo,
LexicalReordering::ReorderingType& reoType) const;
LexicalReordering::ReorderingType& reoType) const = 0;
static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
LexicalReordering::Direction dir);

View File

@ -94,7 +94,7 @@ std::vector<float> LexicalReorderingTableMemory::GetScore(const Phrase& f,
}
}
}
return Score();
return Scores();
}
void LexicalReorderingTableMemory::DbgDump(std::ostream* out) const{
@ -215,14 +215,14 @@ LexicalReorderingTableTree::LexicalReorderingTableTree(
LexicalReorderingTableTree::~LexicalReorderingTableTree(){
}
Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) {
Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) {
if( (!m_FactorsF.empty() && 0 == f.GetSize())
|| (!m_FactorsE.empty() && 0 == e.GetSize())){
//NOTE: no check for c as c might be empty, e.g. start of sentence
//not a proper key
// phi: commented out, since e may be empty (drop-unknown)
//std::cerr << "Not a proper key!\n";
return Score();
return Scores();
}
CacheType::iterator i;;
if(m_UseCache){
@ -239,11 +239,11 @@ Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, con
}
}
//not in cache go to file...
Score score;
Scores score;
Candidates cands;
m_Table->GetCandidates(MakeTableKey(f,e), &cands);
if(cands.empty()){
return Score();
return Scores();
}
if(m_FactorsC.empty()){
@ -259,10 +259,10 @@ Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, con
return score;
};
Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context){
Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context){
if(m_FactorsC.empty()){
assert(cands.size() <= 1);
return (1 == cands.size())?(cands[0].GetScore(0)):(Score());
return (1 == cands.size())?(cands[0].GetScore(0)):(Scores());
} else {
std::vector<std::string> cvec;
for(size_t i = 0; i < context.GetSize(); ++i){
@ -284,7 +284,7 @@ Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands
}
}
}
return Score();
return Scores();
}
}
@ -347,7 +347,7 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
TRACE_ERR(".");
}
IPhrase key;
Score score;
Scores score;
std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
std::string w;
@ -396,7 +396,7 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
//transform score now...
std::transform(score.begin(),score.end(),score.begin(),TransformScore);
std::transform(score.begin(),score.end(),score.begin(),FloorScore);
std::vector<Score> scores;
std::vector<Scores> scores;
scores.push_back(score);
if(key.empty()) {

View File

@ -29,9 +29,6 @@ class ConfusionNet;
//additional types
typedef std::vector<float> Score;
typedef std::vector<FactorType> FactorList;
class LexicalReorderingTable {
public:
LexicalReorderingTable(const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors)
@ -42,7 +39,7 @@ class LexicalReorderingTable {
public:
static LexicalReorderingTable* LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors);
public:
virtual Score GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
virtual Scores GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
virtual void InitializeForInput(const InputType&){
/* override for on-demand loading */
};
@ -133,8 +130,8 @@ class LexicalReorderingTableTree : public LexicalReorderingTable {
void Cache(const ConfusionNet& input);
void Cache(const Sentence& input);
void auxCacheForSrcPhrase(const Phrase& f);
Score auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
void auxCacheForSrcPhrase(const Phrase& f);
Scores auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
private:
//typedef LexicalReorderingCand CandType;
typedef std::map< std::string, Candidates > CacheType;

View File

@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TrellisPath.h"
#include "TrellisPathCollection.h"
#include "TranslationOption.h"
#include "LexicalReordering.h"
#include "LMList.h"
#include "TranslationOptionCollection.h"
#include "DummyScoreProducers.h"

View File

@ -7,6 +7,7 @@
namespace Moses
{
using namespace std;
inline bool existsFile(const char* filePath) {
struct stat mystat;

View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "TranslationOption.h"
#include "UserMessage.h"
namespace Moses {
@ -57,10 +58,10 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
//if we're using an in-memory phrase table, then load it now, otherwise wait
if (!FileExists(filePath+".binphr.idx"))
{ // memory phrase table
VERBOSE(2,"using standard phrase tables" << endl);
VERBOSE(2,"using standard phrase tables" << std::endl);
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
m_filePath += ".gz";
VERBOSE(2,"Using gzipped file" << endl);
VERBOSE(2,"Using gzipped file" << std::endl);
}
if (staticData.GetInputType() != SentenceInput)
{

View File

@ -17,6 +17,7 @@
#include "PrefixTree.h"
#include "File.h"
#include "ObjectPool.h"
#include "LexicalReorderingTable.h"
#include "LVoc.h"
#include "TypeDef.h"
#include "Util.h"
@ -28,13 +29,10 @@ namespace Moses
class Phrase;
class Word;
class ConfusionNet;
class PDTimp;
typedef PrefixTreeF<LabelId,OFF_T> PTF;
class PDTimp;
class PPimp;
class PhraseDictionaryTree : public Dictionary {
PDTimp *imp; //implementation

View File

@ -13,6 +13,8 @@
namespace Moses
{
using namespace std;
void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
{
// Producers must be inserted in the order they are created

View File

@ -309,7 +309,7 @@ void SearchCubePruning::PrintBitmapContainerGraph()
for (iterAccessor = bitmapAccessor.begin(); iterAccessor != bitmapAccessor.end(); ++iterAccessor)
{
cerr << iterAccessor->first << endl;
BitmapContainer &container = *iterAccessor->second;
//BitmapContainer &container = *iterAccessor->second;
}
}

View File

@ -30,6 +30,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
using namespace std;
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
{
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
@ -158,7 +160,6 @@ bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const {
void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const {
//iterate over XmlOptions list, find exact source/target matches
const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin();
iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) {

View File

@ -438,154 +438,69 @@ StaticData::~StaticData()
bool StaticData::LoadLexicalReorderingModel()
{
std::cerr << "Loading lexical distortion models...\n";
const vector<string> fileStr = m_parameter->GetParam("distortion-file");
const vector<string> weightsStr = m_parameter->GetParam("weight-d");
std::vector<float> weights;
size_t w = 1; //cur weight
size_t f = 0; //cur file
//get weights values
std::cerr << "have " << fileStr.size() << " models\n";
for(size_t j = 0; j < weightsStr.size(); ++j){
weights.push_back(Scan<float>(weightsStr[j]));
}
//load all models
for(size_t i = 0; i < fileStr.size(); ++i)
{
vector<string> spec = Tokenize<string>(fileStr[f], " ");
++f; //mark file as consumed
if(4 != spec.size()){
//wrong file specification string...
std::cerr << "Wrong Lexical Reordering Model Specification for model " << i << "!\n";
return false;
}
//spec[0] = factor map
//spec[1] = name
//spec[2] = num weights
//spec[3] = fileName
//decode data into these
vector<FactorType> input,output;
LexicalReordering::Direction direction;
LexicalReordering::Condition condition;
size_t numWeights;
//decode factor map
vector<string> inputfactors = Tokenize(spec[0],"-");
if(inputfactors.size() == 2){
input = Tokenize<FactorType>(inputfactors[0],",");
output = Tokenize<FactorType>(inputfactors[1],",");
}
else if(inputfactors.size() == 1)
{
//if there is only one side assume it is on e side... why?
output = Tokenize<FactorType>(inputfactors[0],",");
}
else
{
//format error
return false;
}
//decode name
vector<string> params = Tokenize<string>(spec[1],"-");
std::string type(ToLower(params[0]));
std::string dir;
std::string cond;
if(3 == params.size())
{
//name format is 'type'-'direction'-'condition'
dir = ToLower(params[1]);
cond = ToLower(params[2]);
}
else if(2 == params.size())
{
//assume name format is 'type'-'condition' with implicit unidirectional
std::cerr << "Warning: Lexical model type underspecified...assuming unidirectional in model " << i << "\n";
dir = "unidirectional";
cond = ToLower(params[1]);
}
else
{
std::cerr << "Lexical model type underspecified for model " << i << "!\n";
return false;
}
VERBOSE(1, "Loading lexical distortion models...");
const vector<string> fileStr = m_parameter->GetParam("distortion-file");
const vector<string> weightsStr = m_parameter->GetParam("weight-d");
if(dir == "forward"){
direction = LexicalReordering::Forward;
}
else if(dir == "backward" || dir == "unidirectional" || dir == "uni")
{
direction = LexicalReordering::Backward;
}
else if(dir == "bidirectional" || dir == "bi")
{
direction = LexicalReordering::Bidirectional;
}
else
{
std::cerr << "Unknown direction declaration '" << dir << "'for lexical reordering model " << i << "\n";
return false;
}
if(cond == "f"){
condition = LexicalReordering::F;
}
else if(cond == "fe")
{
condition = LexicalReordering::FE;
}
else if(cond == "fec")
{
condition = LexicalReordering::FEC;
}
else
{
std::cerr << "Unknown conditioning declaration '" << cond << "'for lexical reordering model " << i << "!\n";
return false;
}
std::vector<float> weights;
size_t w = 1; //cur weight
size_t f = 0; //cur file
//get weights values
VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
for(size_t j = 0; j < weightsStr.size(); ++j){
weights.push_back(Scan<float>(weightsStr[j]));
}
//load all models
for(size_t i = 0; i < fileStr.size(); ++i)
{
vector<string> spec = Tokenize<string>(fileStr[f], " ");
++f; //mark file as consumed
if(spec.size() != 4){
UserMessage::Add("Invalid Lexical Reordering Model Specification: " + fileStr[f]);
return false;
}
// spec[0] = factor map
// spec[1] = name
// spec[2] = num weights
// spec[3] = fileName
//decode num weights (and fetch weight from array...)
std::vector<float> mweights;
numWeights = atoi(spec[2].c_str());
for(size_t k = 0; k < numWeights; ++k, ++w)
{
if(w >= weights.size()){
//error not enough weights...
std::cerr << "Lexicalized distortion model: Not enough weights, add to [weight-d]\n";
return false;
} else {
mweights.push_back(weights[w]);
}
}
//decode filename
string filePath = spec[3];
//all ready load it
//std::cerr << type;
if("monotonicity" == type){
m_reorderModels.push_back(new LexicalMonotonicReordering(filePath, mweights, direction, condition, input, output));
}
else if("orientation" == type || "msd" == type)
{
m_reorderModels.push_back(new LexicalOrientationReordering(filePath, mweights, direction, condition, input, output));
}
else if("directional" == type)
{
m_reorderModels.push_back(new LexicalDirectionalReordering(filePath, mweights, direction, condition, input, output));
}
else
{
//error unknown type!
std::cerr << " ...unknown type!\n";
return false;
}
//std::cerr << "\n";
}
return true;
// decode factor map
vector<FactorType> input, output;
vector<string> inputfactors = Tokenize(spec[0],"-");
if(inputfactors.size() == 2){
input = Tokenize<FactorType>(inputfactors[0],",");
output = Tokenize<FactorType>(inputfactors[1],",");
} else if(inputfactors.size() == 1) {
//if there is only one side assume it is on e side... why?
output = Tokenize<FactorType>(inputfactors[0],",");
} else {
//format error
return false;
}
string modelType = spec[1];
// decode num weights and fetch weights from array
std::vector<float> mweights;
size_t numWeights = atoi(spec[2].c_str());
for(size_t k = 0; k < numWeights; ++k, ++w)
{
if(w >= weights.size()){
UserMessage::Add("Lexicalized distortion model: Not enough weights, add to [weight-d]");
return false;
} else
mweights.push_back(weights[w]);
}
string filePath = spec[3];
m_reorderModels.push_back(new LexicalReordering(input, output, modelType, filePath, mweights));
}
return true;
}
bool StaticData::LoadGlobalLexicalModel()
{
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));

View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "PhraseDictionaryMemory.h"
#include "GenerationDictionary.h"
#include "LMList.h"
#include "LexicalReordering.h"
#include "StaticData.h"
#include "InputType.h"
@ -165,7 +166,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
}
void TranslationOption::CacheReorderingProb(const LexicalReordering &lexreordering
, const Score &score)
, const Scores &score)
{
m_reordering.Assign(&lexreordering, score);
}

View File

@ -37,6 +37,7 @@ namespace Moses
class PhraseDictionary;
class GenerationDictionary;
class LexicalReordering;
/** Available phrase translation for a particular sentence pair.
* In a multi-factor model, this is expanded from the entries in the
@ -177,7 +178,7 @@ public:
void CalcScore();
void CacheReorderingProb(const LexicalReordering &lexreordering
, const Score &score);
, const Scores &score);
TO_STRING();
};

View File

@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "PhraseDictionaryMemory.h"
#include "FactorCollection.h"
#include "InputType.h"
#include "LexicalReordering.h"
#include "Util.h"
#include "StaticData.h"
#include "DecodeStepTranslation.h"
@ -639,7 +640,7 @@ void TranslationOptionCollection::CacheLexReordering()
const Phrase *sourcePhrase = transOpt.GetSourcePhrase();
if (sourcePhrase)
{
Score score = lexreordering.GetProb(*sourcePhrase
Scores score = lexreordering.GetProb(*sourcePhrase
, transOpt.GetTargetPhrase());
// TODO should have better handling of unknown reordering entries
if (!score.empty())

View File

@ -186,6 +186,8 @@ typedef size_t FactorType;
typedef std::vector<float> Scores;
typedef std::vector<std::string> WordAlignments;
typedef std::vector<FactorType> FactorList;
typedef std::pair<std::vector<std::string const*>,Scores > StringTgtCand;
typedef std::pair<std::vector<std::string const*>,WordAlignments > StringWordAlignmentCand;

View File

@ -31,6 +31,7 @@
namespace Moses
{
using namespace std;
string ParseXmlTagAttribute(const string& tag,const string& attributeName){
/*TODO deal with unescaping \"*/