mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
6ea0bb1f61
4
Jamroot
4
Jamroot
@ -76,6 +76,10 @@ include $(TOP)/jam-files/sanity.jam ;
|
||||
boost 103600 ;
|
||||
external-lib z ;
|
||||
|
||||
lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
|
||||
requirements += <library>dl ;
|
||||
|
||||
|
||||
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
|
||||
if [ option.get "full-tcmalloc" : : "yes" ] {
|
||||
external-lib unwind ;
|
||||
|
@ -1066,6 +1066,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/DistortionScoreProducer.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ExternalFeature.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExternalFeature.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ExternalFeature.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExternalFeature.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/FFState.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -93,6 +93,9 @@ void ChartManager::ProcessSentence()
|
||||
m_parser.Create(range, m_translationOptionList);
|
||||
m_translationOptionList.ApplyThreshold();
|
||||
|
||||
const InputPath &inputPath = m_parser.GetInputPath(range);
|
||||
m_translationOptionList.Evaluate(m_source, inputPath);
|
||||
|
||||
// decode
|
||||
ChartCell &cell = m_hypoStackColl.Get(range);
|
||||
|
||||
|
@ -219,6 +219,11 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
||||
}
|
||||
}
|
||||
|
||||
const InputPath &ChartParser::GetInputPath(WordsRange &range) const
|
||||
{
|
||||
return GetInputPath(range.GetStartPos(), range.GetEndPos());
|
||||
}
|
||||
|
||||
const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
|
||||
{
|
||||
size_t offset = endPos - startPos;
|
||||
|
@ -66,6 +66,7 @@ public:
|
||||
long GetTranslationId() const;
|
||||
size_t GetSize() const;
|
||||
const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
|
||||
const InputPath &GetInputPath(WordsRange &range) const;
|
||||
|
||||
private:
|
||||
ChartParserUnknown m_unknown;
|
||||
|
@ -13,6 +13,7 @@ class ChartTranslationOption
|
||||
protected:
|
||||
const TargetPhrase &m_targetPhrase;
|
||||
ScoreComponentCollection m_scoreBreakdown;
|
||||
const InputPath *m_inputPath;
|
||||
|
||||
public:
|
||||
ChartTranslationOption(const TargetPhrase &targetPhrase);
|
||||
@ -21,6 +22,11 @@ public:
|
||||
return m_targetPhrase;
|
||||
}
|
||||
|
||||
void SetInputPath(const InputPath *inputPath)
|
||||
{ m_inputPath = inputPath; }
|
||||
const InputPath *GetInputPath() const
|
||||
{ return m_inputPath; }
|
||||
|
||||
const ScoreComponentCollection &GetScores() const {
|
||||
return m_scoreBreakdown;
|
||||
}
|
||||
|
@ -69,6 +69,7 @@ void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &
|
||||
CollType::iterator iter;
|
||||
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
||||
ChartTranslationOption &transOpt = **iter;
|
||||
transOpt.SetInputPath(&inputPath);
|
||||
transOpt.Evaluate(input, inputPath);
|
||||
}
|
||||
|
||||
|
73
moses/FF/ExternalFeature.cpp
Normal file
73
moses/FF/ExternalFeature.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
#include "ExternalFeature.h"
|
||||
#include <dlfcn.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
ExternalFeatureState::ExternalFeatureState(int stateSize, void *data)
|
||||
{
|
||||
m_stateSize = stateSize;
|
||||
m_data = malloc(stateSize);
|
||||
memcpy(m_data, data, stateSize);
|
||||
}
|
||||
|
||||
void ExternalFeature::Load()
|
||||
{
|
||||
string nparam = "testing";
|
||||
|
||||
if (m_path.size() < 1) {
|
||||
cerr << "External requires a path to a dynamic library!\n";
|
||||
abort();
|
||||
}
|
||||
lib_handle = dlopen(m_path.c_str(), RTLD_LAZY);
|
||||
if (!lib_handle) {
|
||||
cerr << "dlopen reports: " << dlerror() << endl;
|
||||
cerr << "Did you provide a full path to the dynamic library?\n";
|
||||
abort();
|
||||
}
|
||||
CdecFF* (*fn)(const string&) =
|
||||
(CdecFF* (*)(const string&))(dlsym(lib_handle, "create_ff"));
|
||||
if (!fn) {
|
||||
cerr << "dlsym reports: " << dlerror() << endl;
|
||||
abort();
|
||||
}
|
||||
ff_ext = (*fn)(nparam);
|
||||
m_stateSize = ff_ext->StateSize();
|
||||
|
||||
}
|
||||
|
||||
ExternalFeature::~ExternalFeature() {
|
||||
delete ff_ext;
|
||||
dlclose(lib_handle);
|
||||
}
|
||||
|
||||
void ExternalFeature::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
m_path = value;
|
||||
}
|
||||
else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
FFState* ExternalFeature::Evaluate(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
return new ExternalFeatureState(m_stateSize);
|
||||
}
|
||||
|
||||
FFState* ExternalFeature::EvaluateChart(
|
||||
const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID - used to index the state in the previous hypotheses */,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
return new ExternalFeatureState(m_stateSize);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
93
moses/FF/ExternalFeature.h
Normal file
93
moses/FF/ExternalFeature.h
Normal file
@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "FFState.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class CdecFF;
|
||||
|
||||
class ExternalFeatureState : public FFState
|
||||
{
|
||||
protected:
|
||||
int m_stateSize;
|
||||
void *m_data;
|
||||
public:
|
||||
ExternalFeatureState(int stateSize)
|
||||
:m_stateSize(stateSize)
|
||||
,m_data(NULL)
|
||||
{}
|
||||
ExternalFeatureState(int stateSize, void *data);
|
||||
|
||||
~ExternalFeatureState()
|
||||
{
|
||||
free(m_data);
|
||||
}
|
||||
|
||||
int Compare(const FFState& other) const
|
||||
{
|
||||
const ExternalFeatureState &otherFF = static_cast<const ExternalFeatureState&>(other);
|
||||
int ret = memcmp(m_data, otherFF.m_data, m_stateSize);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
// copied from cdec
|
||||
class ExternalFeature : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
ExternalFeature(const std::string &line)
|
||||
:StatefulFeatureFunction("ExternalFeature", line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
~ExternalFeature();
|
||||
|
||||
void Load();
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const
|
||||
{ return true; }
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, ScoreComponentCollection &scoreBreakdown) const
|
||||
{}
|
||||
FFState* Evaluate(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
FFState* EvaluateChart(
|
||||
const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID - used to index the state in the previous hypotheses */,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
return new ExternalFeatureState(m_stateSize);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string m_path;
|
||||
void* lib_handle;
|
||||
CdecFF *ff_ext;
|
||||
int m_stateSize;
|
||||
};
|
||||
|
||||
class CdecFF
|
||||
{
|
||||
public:
|
||||
virtual int StateSize() const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -30,6 +30,8 @@
|
||||
#include "moses/FF/PhrasePenalty.h"
|
||||
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
|
||||
#include "moses/FF/ControlRecombination.h"
|
||||
#include "moses/FF/ExternalFeature.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
#include "moses/FF/SkeletonStatefulFF.h"
|
||||
|
||||
@ -142,6 +144,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(ControlRecombination);
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
MOSES_FNAME(ExternalFeature);
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
MOSES_FNAME(SyntacticLanguageModel);
|
||||
|
@ -18,6 +18,8 @@ InputPath::InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms,
|
||||
,m_range(range)
|
||||
,m_inputScore(inputScore)
|
||||
{
|
||||
//cerr << "phrase=" << phrase << " m_inputScore=" << *m_inputScore << endl;
|
||||
|
||||
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().first;
|
||||
if (placeholderFactor != NOT_FOUND) {
|
||||
for (size_t pos = 0; pos < m_phrase.GetSize(); ++pos) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
// $Id$
|
||||
#include <vector>
|
||||
|
||||
#include "util/exception.hh"
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
@ -30,6 +30,20 @@ void ScorePair::PlusEquals(const StringPiece &key, float value)
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
|
||||
{
|
||||
for (size_t i = 0; i < rhs.denseScores.size(); ++i) {
|
||||
os << rhs.denseScores[i] << ",";
|
||||
}
|
||||
|
||||
std::map<StringPiece, float>::const_iterator iter;
|
||||
for (iter = rhs.sparseScores.begin(); iter != rhs.sparseScores.end(); ++iter) {
|
||||
os << iter->first << "=" << iter->second << ",";
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
||||
size_t ScoreComponentCollection::s_denseVectorSize = 0;
|
||||
|
||||
@ -206,6 +220,21 @@ void ScoreComponentCollection::Assign(const FeatureFunction* sp, const string li
|
||||
}
|
||||
}
|
||||
|
||||
void ScoreComponentCollection::Assign(const FeatureFunction* sp, const std::vector<float>& scores) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
size_t numScores = indexes.second - indexes.first;
|
||||
|
||||
if (scores.size() != numScores) {
|
||||
UTIL_THROW(util::Exception, "Feature function " << sp->GetScoreProducerDescription() << " specified "
|
||||
<< numScores << " dense scores or weights. Actually has " << scores.size());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
m_scores[i + indexes.first] = scores[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
|
||||
{
|
||||
|
||||
|
@ -46,6 +46,8 @@ namespace Moses
|
||||
*/
|
||||
struct ScorePair
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream& os, const ScorePair& rhs);
|
||||
|
||||
std::vector<float> denseScores;
|
||||
std::map<StringPiece, float> sparseScores;
|
||||
|
||||
@ -262,13 +264,7 @@ public:
|
||||
m_scores[fname] += score;
|
||||
}
|
||||
|
||||
void Assign(const FeatureFunction* sp, const std::vector<float>& scores) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
CHECK(scores.size() == indexes.second - indexes.first);
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
m_scores[i + indexes.first] = scores[i];
|
||||
}
|
||||
}
|
||||
void Assign(const FeatureFunction* sp, const std::vector<float>& scores);
|
||||
|
||||
//! Special version Assign(ScoreProducer, vector<float>)
|
||||
//! to add the score from a single ScoreProducer that produces
|
||||
|
@ -1,44 +0,0 @@
|
||||
//
|
||||
// ExtractedRule.cpp
|
||||
// extract
|
||||
//
|
||||
// Created by Hieu Hoang on 13/09/2011.
|
||||
// Copyright 2011 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#include "ExtractedRule.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
void ExtractedRule::OutputNTLengths(std::ostream &out) const
|
||||
{
|
||||
ostringstream outString;
|
||||
OutputNTLengths(outString);
|
||||
out << outString;
|
||||
}
|
||||
|
||||
void ExtractedRule::OutputNTLengths(std::ostringstream &outString) const
|
||||
{
|
||||
std::map<size_t, std::pair<size_t, size_t> >::const_iterator iter;
|
||||
for (iter = m_ntLengths.begin(); iter != m_ntLengths.end(); ++iter) {
|
||||
size_t sourcePos = iter->first;
|
||||
const std::pair<size_t, size_t> &spanLengths = iter->second;
|
||||
outString << sourcePos << "=" << spanLengths.first << "," <<spanLengths.second << " ";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj)
|
||||
{
|
||||
out << obj.source << " ||| " << obj.target << " ||| "
|
||||
<< obj.alignment << " ||| "
|
||||
<< obj.alignmentInv << " ||| ";
|
||||
|
||||
obj.OutputNTLengths(out);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace
|
@ -32,8 +32,6 @@ namespace MosesTraining
|
||||
// sentence-level collection of rules
|
||||
class ExtractedRule
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &, const ExtractedRule &);
|
||||
|
||||
public:
|
||||
std::string source;
|
||||
std::string target;
|
||||
@ -54,8 +52,6 @@ public:
|
||||
float count;
|
||||
double pcfgScore;
|
||||
|
||||
std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
|
||||
|
||||
ExtractedRule(int sT, int eT, int sS, int eS)
|
||||
: source()
|
||||
, target()
|
||||
@ -76,13 +72,6 @@ public:
|
||||
, count(0)
|
||||
, pcfgScore(0.0) {
|
||||
}
|
||||
|
||||
void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength) {
|
||||
m_ntLengths[sourcePos] = std::pair<size_t, size_t>(sourceLength, targetLength);
|
||||
}
|
||||
|
||||
void OutputNTLengths(std::ostream &out) const;
|
||||
void OutputNTLengths(std::ostringstream &out) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -49,7 +49,6 @@ public:
|
||||
bool duplicateRules;
|
||||
bool fractionalCounting;
|
||||
bool pcfgScore;
|
||||
bool outputNTLengths;
|
||||
bool gzOutput;
|
||||
bool unpairedExtractFormat;
|
||||
bool conditionOnTargetLhs;
|
||||
@ -83,7 +82,6 @@ public:
|
||||
, duplicateRules(true)
|
||||
, fractionalCounting(true)
|
||||
, pcfgScore(false)
|
||||
, outputNTLengths(false)
|
||||
, gzOutput(false)
|
||||
, unpairedExtractFormat(false)
|
||||
, conditionOnTargetLhs(false)
|
||||
|
@ -41,7 +41,6 @@ bool lowCountFlag = false;
|
||||
bool goodTuringFlag = false;
|
||||
bool kneserNeyFlag = false;
|
||||
bool logProbFlag = false;
|
||||
bool outputNTLengths = false;
|
||||
inline float maybeLogProb( float a )
|
||||
{
|
||||
return logProbFlag ? log(a) : a;
|
||||
@ -62,7 +61,7 @@ int main(int argc, char* argv[])
|
||||
<< "consolidating direct and indirect rule tables\n";
|
||||
|
||||
if (argc < 4) {
|
||||
cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n";
|
||||
cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] \n";
|
||||
exit(1);
|
||||
}
|
||||
char* &fileNameDirect = argv[1];
|
||||
@ -119,8 +118,6 @@ int main(int argc, char* argv[])
|
||||
} else if (strcmp(argv[i],"--LogProb") == 0) {
|
||||
logProbFlag = true;
|
||||
cerr << "using log-probabilities\n";
|
||||
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
|
||||
outputNTLengths = true;
|
||||
} else {
|
||||
cerr << "ERROR: unknown option " << argv[i] << endl;
|
||||
exit(1);
|
||||
@ -315,10 +312,6 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
// counts, for debugging
|
||||
fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
|
||||
|
||||
if (outputNTLengths) {
|
||||
fileConsolidated << " ||| " << itemDirect[5];
|
||||
}
|
||||
|
||||
// count bin feature (as a sparse feature)
|
||||
if (sparseCountBinFeatureFlag ||
|
||||
directSparseScores.compare("") != 0 ||
|
||||
|
@ -129,7 +129,6 @@ int main(int argc, char* argv[])
|
||||
<< " --GlueGrammar FILE"
|
||||
<< " | --UnknownWordLabel FILE"
|
||||
<< " | --OnlyDirect"
|
||||
<< " | --OutputNTLengths"
|
||||
<< " | --MaxSpan[" << options.maxSpan << "]"
|
||||
<< " | --MinHoleTarget[" << options.minHoleTarget << "]"
|
||||
<< " | --MinHoleSource[" << options.minHoleSource << "]"
|
||||
@ -262,8 +261,6 @@ int main(int argc, char* argv[])
|
||||
options.fractionalCounting = false;
|
||||
} else if (strcmp(argv[i],"--PCFG") == 0) {
|
||||
options.pcfgScore = true;
|
||||
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
|
||||
options.outputNTLengths = true;
|
||||
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
|
||||
options.unpairedExtractFormat = true;
|
||||
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
|
||||
@ -663,9 +660,6 @@ void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
|
||||
rule.alignment += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
|
||||
if (!m_options.onlyDirectFlag)
|
||||
rule.alignmentInv += targetSymbolIndex + "-" + sourceSymbolIndex + " ";
|
||||
|
||||
rule.SetSpanLength(hole.GetPos(0), hole.GetSize(0), hole.GetSize(1) ) ;
|
||||
|
||||
}
|
||||
|
||||
rule.alignment.erase(rule.alignment.size()-1);
|
||||
@ -1077,9 +1071,6 @@ void ExtractTask::writeRulesToFile()
|
||||
<< rule->target << " ||| "
|
||||
<< rule->alignment << " ||| "
|
||||
<< rule->count << " ||| ";
|
||||
if (m_options.outputNTLengths) {
|
||||
rule->OutputNTLengths(out);
|
||||
}
|
||||
if (m_options.pcfgScore) {
|
||||
out << " ||| " << rule->pcfgScore;
|
||||
}
|
||||
|
@ -59,7 +59,6 @@ int negLogProb = 1;
|
||||
bool lexFlag = true;
|
||||
bool unalignedFlag = false;
|
||||
bool unalignedFWFlag = false;
|
||||
bool outputNTLengths = false;
|
||||
bool singletonFeature = false;
|
||||
bool crossedNonTerm = false;
|
||||
int countOfCounts[COC_MAX+1];
|
||||
@ -82,9 +81,6 @@ double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlig
|
||||
set<string> functionWordList;
|
||||
void loadFunctionWords( const string &fileNameFunctionWords );
|
||||
double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
|
||||
void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
|
||||
, map<size_t, map<size_t, float> > &sourceProb
|
||||
, map<size_t, map<size_t, float> > &targetProb);
|
||||
void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
|
||||
void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
|
||||
|
||||
@ -95,7 +91,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
ScoreFeatureManager featureManager;
|
||||
if (argc < 4) {
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
|
||||
cerr << featureManager.usage() << endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -158,8 +154,6 @@ int main(int argc, char* argv[])
|
||||
minCountHierarchical = atof(argv[++i]);
|
||||
cerr << "dropping all phrase pairs occurring less than " << minCountHierarchical << " times\n";
|
||||
minCountHierarchical -= 0.00001; // account for rounding
|
||||
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
|
||||
outputNTLengths = true;
|
||||
} else if (strcmp(argv[i],"--Singleton") == 0) {
|
||||
singletonFeature = true;
|
||||
cerr << "binary singleton feature\n";
|
||||
@ -375,87 +369,6 @@ const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrase
|
||||
return *bestAlignment;
|
||||
}
|
||||
|
||||
|
||||
void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths
|
||||
, size_t total
|
||||
, map<size_t, map<size_t, float> > &probs)
|
||||
{
|
||||
map<size_t, map<size_t, size_t> >::const_iterator iterOuter;
|
||||
for (iterOuter = lengths.begin(); iterOuter != lengths.end(); ++iterOuter) {
|
||||
size_t sourcePos = iterOuter->first;
|
||||
const map<size_t, size_t> &inner = iterOuter->second;
|
||||
|
||||
map<size_t, size_t>::const_iterator iterInner;
|
||||
for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
|
||||
size_t length = iterInner->first;
|
||||
size_t count = iterInner->second;
|
||||
float prob = (float) count / (float) total;
|
||||
probs[sourcePos][length] = prob;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
|
||||
, map<size_t, map<size_t, float> > &sourceProb
|
||||
, map<size_t, map<size_t, float> > &targetProb)
|
||||
{
|
||||
map<size_t, map<size_t, size_t> > sourceLengths, targetLengths;
|
||||
// 1st = position in source phrase, 2nd = length, 3rd = count
|
||||
map<size_t, size_t> totals;
|
||||
// 1st = position in source phrase, 2nd = total counts
|
||||
// each source pos should have same count?
|
||||
|
||||
vector< PhraseAlignment* >::const_iterator iterOuter;
|
||||
for (iterOuter = phrasePairs.begin(); iterOuter != phrasePairs.end(); ++iterOuter) {
|
||||
const PhraseAlignment &phrasePair = **iterOuter;
|
||||
const std::map<size_t, std::pair<size_t, size_t> > &ntLengths = phrasePair.GetNTLengths();
|
||||
|
||||
std::map<size_t, std::pair<size_t, size_t> >::const_iterator iterInner;
|
||||
for (iterInner = ntLengths.begin(); iterInner != ntLengths.end(); ++iterInner) {
|
||||
size_t sourcePos = iterInner->first;
|
||||
size_t sourceLength = iterInner->second.first;
|
||||
size_t targetLength = iterInner->second.second;
|
||||
|
||||
sourceLengths[sourcePos][sourceLength]++;
|
||||
targetLengths[sourcePos][targetLength]++;
|
||||
|
||||
totals[sourcePos]++;
|
||||
}
|
||||
}
|
||||
|
||||
if (totals.size() == 0) {
|
||||
// no non-term. Don't bother
|
||||
return;
|
||||
}
|
||||
|
||||
size_t total = totals.begin()->second;
|
||||
if (totals.size() > 1) {
|
||||
assert(total == (++totals.begin())->second );
|
||||
}
|
||||
|
||||
calcNTLengthProb(sourceLengths, total, sourceProb);
|
||||
calcNTLengthProb(targetLengths, total, targetProb);
|
||||
|
||||
}
|
||||
|
||||
void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t, float> > &probs, const string &prefix)
|
||||
{
|
||||
map<size_t, map<size_t, float> >::const_iterator iterOuter;
|
||||
for (iterOuter = probs.begin(); iterOuter != probs.end(); ++iterOuter) {
|
||||
size_t sourcePos = iterOuter->first;
|
||||
const map<size_t, float> &inner = iterOuter->second;
|
||||
|
||||
map<size_t, float>::const_iterator iterInner;
|
||||
for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
|
||||
size_t length = iterInner->first;
|
||||
float prob = iterInner->second;
|
||||
|
||||
phraseTableFile << sourcePos << "|" << prefix << "|" << length << "=" << prob << " ";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool calcCrossedNonTerm(size_t sourcePos, size_t targetPos, const std::vector< std::set<size_t> > &alignedToS)
|
||||
{
|
||||
for (size_t currSource = 0; currSource < alignedToS.size(); ++currSource) {
|
||||
@ -664,21 +577,6 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
|
||||
if (kneserNeyFlag)
|
||||
phraseTableFile << " " << distinctCount;
|
||||
|
||||
// nt lengths
|
||||
if (outputNTLengths) {
|
||||
phraseTableFile << " ||| ";
|
||||
|
||||
if (!inverseFlag) {
|
||||
map<size_t, map<size_t, float> > sourceProb, targetProb;
|
||||
// 1st sourcePos, 2nd = length, 3rd = prob
|
||||
|
||||
calcNTLengthProb(phrasePair, sourceProb, targetProb);
|
||||
|
||||
outputNTLengthProbs(phraseTableFile, sourceProb, "S");
|
||||
outputNTLengthProbs(phraseTableFile, targetProb, "T");
|
||||
}
|
||||
}
|
||||
|
||||
phraseTableFile << endl;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,8 @@
|
||||
Distortion0= 0.3
|
||||
UnknownWordPenalty0= 1
|
||||
WordPenalty0= -1
|
||||
TranslationModel0= 0.2 0.2 0.2 0.2 0.2
|
||||
TranslationModel0= 0.2 0.2 0.2 0.2
|
||||
PhrasePenalty0= 0.2
|
||||
LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3
|
||||
LM0= 0.5
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user