mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Merge branch 'master' into dynamic-phrase-tables
Conflicts: contrib/server/Jamfile contrib/server/mosesserver.cpp
This commit is contained in:
commit
7c145d045b
@ -166,6 +166,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ChartHypothesisCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ChartKBestExtractor.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ChartKBestExtractor.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ChartManager.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1251,6 +1261,26 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePenalty.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ReferenceComparison.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ReferenceComparison.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/RuleAmbiguity.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/RuleAmbiguity.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SetSourcePhrase.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <xmlrpc-c/server_abyss.hpp>
|
||||
|
||||
using namespace Moses;
|
||||
using namespace MosesCmd;
|
||||
using namespace std;
|
||||
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
@ -117,20 +118,17 @@ public:
|
||||
void breakOutParams(const params_t& params) {
|
||||
params_t::const_iterator si = params.find("source");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
|
||||
source_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "source = " << source_ << endl;
|
||||
si = params.find("target");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing target sentence",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
|
||||
target_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "target = " << target_ << endl;
|
||||
si = params.find("alignment");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing alignment",
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
|
||||
alignment_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "alignment = " << alignment_ << endl;
|
||||
si = params.find("bounded");
|
||||
@ -163,8 +161,7 @@ public:
|
||||
xmlrpc_c::fault::CODE_PARSE);
|
||||
}
|
||||
const string model_name = xmlrpc_c::value_string(si->second);
|
||||
PhraseDictionaryMultiModel*
|
||||
pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
|
||||
si = params.find("phrase_pairs");
|
||||
if (si == params.end()) {
|
||||
@ -175,13 +172,10 @@ public:
|
||||
|
||||
vector<pair<string, string> > phrase_pairs;
|
||||
|
||||
xmlrpc_c::value_array phrase_pairs_array
|
||||
= xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> phrasePairValueVector
|
||||
(phrase_pairs_array.vectorValueValue());
|
||||
xmlrpc_c::value_array phrase_pairs_array = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> phrasePairValueVector(phrase_pairs_array.vectorValueValue());
|
||||
for (size_t i=0;i < phrasePairValueVector.size();i++) {
|
||||
xmlrpc_c::value_array phrasePairArray
|
||||
= xmlrpc_c::value_array(phrasePairValueVector[i]);
|
||||
xmlrpc_c::value_array phrasePairArray = xmlrpc_c::value_array(phrasePairValueVector[i]);
|
||||
vector<xmlrpc_c::value> phrasePair(phrasePairArray.vectorValueValue());
|
||||
string L1 = xmlrpc_c::value_string(phrasePair[0]);
|
||||
string L2 = xmlrpc_c::value_string(phrasePair[1]);
|
||||
@ -197,8 +191,7 @@ public:
|
||||
}
|
||||
*retvalP = xmlrpc_c::value_array(weight_vector_ret);
|
||||
#else
|
||||
string errmsg = "Error: Perplexity minimization requires dlib ";
|
||||
errmsg += "(compilation option --with-dlib)";
|
||||
string errmsg = "Error: Perplexity minimization requires dlib (compilation option --with-dlib)";
|
||||
cerr << errmsg << endl;
|
||||
*retvalP = xmlrpc_c::value_string(errmsg);
|
||||
#endif
|
||||
@ -234,6 +227,8 @@ public:
|
||||
cerr << "Input: " << source << endl;
|
||||
si = params.find("align");
|
||||
bool addAlignInfo = (si != params.end());
|
||||
si = params.find("word-align");
|
||||
bool addWordAlignInfo = (si != params.end());
|
||||
si = params.find("sg");
|
||||
bool addGraphInfo = (si != params.end());
|
||||
si = params.find("topt");
|
||||
@ -241,8 +236,7 @@ public:
|
||||
si = params.find("report-all-factors");
|
||||
bool reportAllFactors = (si != params.end());
|
||||
si = params.find("nbest");
|
||||
int nbest_size = ((si == params.end()) ? 0
|
||||
: int(xmlrpc_c::value_int(si->second)));
|
||||
int nbest_size = (si == params.end()) ? 0 : int(xmlrpc_c::value_int(si->second));
|
||||
si = params.find("nbest-distinct");
|
||||
bool nbest_distinct = (si != params.end());
|
||||
|
||||
@ -252,21 +246,17 @@ public:
|
||||
vector<float> multiModelWeights;
|
||||
si = params.find("lambda");
|
||||
if (si != params.end()) {
|
||||
xmlrpc_c::value_array multiModelArray
|
||||
= xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> multiModelValueVector
|
||||
(multiModelArray.vectorValueValue());
|
||||
xmlrpc_c::value_array multiModelArray = xmlrpc_c::value_array(si->second);
|
||||
vector<xmlrpc_c::value> multiModelValueVector(multiModelArray.vectorValueValue());
|
||||
for (size_t i=0;i < multiModelValueVector.size();i++) {
|
||||
multiModelWeights.push_back
|
||||
(xmlrpc_c::value_double(multiModelValueVector[i]));
|
||||
multiModelWeights.push_back(xmlrpc_c::value_double(multiModelValueVector[i]));
|
||||
}
|
||||
}
|
||||
|
||||
si = params.find("model_name");
|
||||
if (si != params.end() && multiModelWeights.size() > 0) {
|
||||
const string model_name = xmlrpc_c::value_string(si->second);
|
||||
PhraseDictionaryMultiModel* pdmm;
|
||||
pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
|
||||
pdmm->SetTemporaryMultiModelWeightsVector(multiModelWeights);
|
||||
}
|
||||
|
||||
@ -303,8 +293,21 @@ public:
|
||||
vector<xmlrpc_c::value> alignInfo;
|
||||
outputHypo(out,hypo,addAlignInfo,alignInfo,reportAllFactors);
|
||||
if (addAlignInfo) {
|
||||
retData.insert(pair<string, xmlrpc_c::value>
|
||||
("align", xmlrpc_c::value_array(alignInfo)));
|
||||
retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
|
||||
}
|
||||
if (addWordAlignInfo) {
|
||||
stringstream wordAlignment;
|
||||
OutputAlignment(wordAlignment, hypo);
|
||||
vector<xmlrpc_c::value> alignments;
|
||||
string alignmentPair;
|
||||
while (wordAlignment >> alignmentPair) {
|
||||
int pos = alignmentPair.find('-');
|
||||
map<string, xmlrpc_c::value> wordAlignInfo;
|
||||
wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
|
||||
wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
|
||||
alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
|
||||
}
|
||||
retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
|
||||
}
|
||||
|
||||
if(addGraphInfo) {
|
||||
@ -326,12 +329,9 @@ public:
|
||||
*retvalP = xmlrpc_c::value_struct(retData);
|
||||
}
|
||||
|
||||
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo,
|
||||
vector<xmlrpc_c::value>& alignInfo,
|
||||
bool reportAllFactors = false) {
|
||||
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo, bool reportAllFactors = false) {
|
||||
if (hypo->GetPrevHypo() != NULL) {
|
||||
outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo,
|
||||
reportAllFactors);
|
||||
outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo, reportAllFactors);
|
||||
Phrase p = hypo->GetCurrTargetPhrase();
|
||||
if(reportAllFactors) {
|
||||
out << p << " ";
|
||||
@ -349,16 +349,9 @@ public:
|
||||
* triples.
|
||||
**/
|
||||
map<string, xmlrpc_c::value> phraseAlignInfo;
|
||||
|
||||
phraseAlignInfo["tgt-start"]
|
||||
= xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos());
|
||||
|
||||
phraseAlignInfo["src-start"]
|
||||
= xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
|
||||
|
||||
phraseAlignInfo["src-end"]
|
||||
= xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
|
||||
|
||||
phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos());
|
||||
phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
|
||||
phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
|
||||
alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo));
|
||||
}
|
||||
}
|
||||
@ -456,9 +449,25 @@ public:
|
||||
}
|
||||
nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());
|
||||
|
||||
if (addAlignmentInfo)
|
||||
if (addAlignmentInfo) {
|
||||
nBestXMLItem["align"] = xmlrpc_c::value_array(alignInfo);
|
||||
|
||||
if ((int)edges.size() > 0) {
|
||||
stringstream wordAlignment;
|
||||
OutputAlignment(wordAlignment, edges[0]);
|
||||
vector<xmlrpc_c::value> alignments;
|
||||
string alignmentPair;
|
||||
while (wordAlignment >> alignmentPair) {
|
||||
int pos = alignmentPair.find('-');
|
||||
map<string, xmlrpc_c::value> wordAlignInfo;
|
||||
wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
|
||||
wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
|
||||
alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
|
||||
}
|
||||
nBestXMLItem["word-align"] = xmlrpc_c::value_array(alignments);
|
||||
}
|
||||
}
|
||||
|
||||
if (addScoreBreakdown)
|
||||
{
|
||||
// should the score breakdown be reported in a more structured manner?
|
||||
@ -619,11 +628,20 @@ int main(int argc, char** argv)
|
||||
myRegistry.addMethod("updater", updater);
|
||||
myRegistry.addMethod("optimize", optimizer);
|
||||
|
||||
xmlrpc_c::serverAbyss myAbyssServer(
|
||||
myRegistry,
|
||||
port, // TCP port on which to listen
|
||||
logfile
|
||||
);
|
||||
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
|
||||
xmlrpc_c::serverAbyss myAbyssServer(
|
||||
myRegistry,
|
||||
port, // TCP port on which to listen
|
||||
logfile
|
||||
xmlrpc_c::serverAbyss::constrOpt()
|
||||
.registryPtr(&myRegistry)
|
||||
.portNumber(port) // TCP port on which to listen
|
||||
.logFileName(logfile)
|
||||
.allowOrigin("*")
|
||||
);
|
||||
*/
|
||||
|
||||
cerr << "Listening on port " << port << endl;
|
||||
if (isSerial) {
|
||||
|
@ -553,7 +553,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
|
||||
|
||||
//DIMw
|
||||
void IOWrapper::OutputDetailedAllTranslationReport(
|
||||
const ChartTrellisPathList &nBestList,
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const ChartManager &manager,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
@ -793,6 +793,58 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
|
||||
m_nBestOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId)
|
||||
{
|
||||
std::ostringstream out;
|
||||
|
||||
if (m_nBestOutputCollector->OutputIsCout()) {
|
||||
// Set precision only if we're writing the n-best list to cout. This is to
|
||||
// preserve existing behaviour, but should probably be done either way.
|
||||
IOWrapper::FixPrecision(out);
|
||||
}
|
||||
|
||||
bool includeWordAlignment =
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
|
||||
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
||||
p != nBestList.end(); ++p) {
|
||||
const ChartKBestExtractor::Derivation &derivation = **p;
|
||||
|
||||
// get the derivation's target-side yield
|
||||
Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
|
||||
|
||||
// delete <s> and </s>
|
||||
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
outputPhrase.RemoveWord(0);
|
||||
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
||||
|
||||
// print the translation ID, surface factors, and scores
|
||||
out << translationId << " ||| ";
|
||||
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
|
||||
out << " ||| ";
|
||||
OutputAllFeatureScores(derivation.scoreBreakdown, out);
|
||||
out << " ||| " << derivation.score;
|
||||
|
||||
// optionally, print word alignments
|
||||
if (includeWordAlignment) {
|
||||
out << " ||| ";
|
||||
Alignments align;
|
||||
OutputAlignmentNBest(align, derivation, 0);
|
||||
for (Alignments::const_iterator q = align.begin(); q != align.end();
|
||||
++q) {
|
||||
out << q->first << "-" << q->second << " ";
|
||||
}
|
||||
}
|
||||
|
||||
out << std::endl;
|
||||
}
|
||||
|
||||
assert(m_nBestOutputCollector);
|
||||
m_nBestOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
|
||||
{
|
||||
std::ostringstream out;
|
||||
@ -927,6 +979,85 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT
|
||||
return totalTargetSize;
|
||||
}
|
||||
|
||||
size_t IOWrapper::OutputAlignmentNBest(
|
||||
Alignments &retAlign,
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget)
|
||||
{
|
||||
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
|
||||
|
||||
size_t totalTargetSize = 0;
|
||||
size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
|
||||
|
||||
const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
|
||||
|
||||
size_t thisSourceSize = CalcSourceSize(&hypo);
|
||||
|
||||
// position of each terminal word in translation rule, irrespective of alignment
|
||||
// if non-term, number is undefined
|
||||
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
||||
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
||||
|
||||
const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
||||
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
||||
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
||||
|
||||
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
|
||||
"Error");
|
||||
|
||||
size_t targetInd = 0;
|
||||
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
||||
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
||||
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
||||
size_t sourceInd = targetPos2SourceInd[targetPos];
|
||||
size_t sourcePos = sourceInd2pos[sourceInd];
|
||||
|
||||
const Moses::ChartKBestExtractor::Derivation &subderivation =
|
||||
*derivation.subderivations[sourceInd];
|
||||
|
||||
// calc source size
|
||||
size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
|
||||
sourceOffsets[sourcePos] = sourceSize;
|
||||
|
||||
// calc target size.
|
||||
// Recursively look thru child hypos
|
||||
size_t currStartTarget = startTarget + totalTargetSize;
|
||||
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
||||
currStartTarget);
|
||||
targetOffsets[targetPos] = targetSize;
|
||||
|
||||
totalTargetSize += targetSize;
|
||||
++targetInd;
|
||||
} else {
|
||||
++totalTargetSize;
|
||||
}
|
||||
}
|
||||
|
||||
// convert position within translation rule to absolute position within
|
||||
// source sentence / output sentence
|
||||
ShiftOffsets(sourceOffsets, startSource);
|
||||
ShiftOffsets(targetOffsets, startTarget);
|
||||
|
||||
// get alignments from this hypo
|
||||
const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
|
||||
|
||||
// add to output arg, offsetting by source & target
|
||||
AlignmentInfo::const_iterator iter;
|
||||
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
||||
const std::pair<size_t,size_t> &align = *iter;
|
||||
size_t relSource = align.first;
|
||||
size_t relTarget = align.second;
|
||||
size_t absSource = sourceOffsets[relSource];
|
||||
size_t absTarget = targetOffsets[relTarget];
|
||||
|
||||
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
||||
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
||||
UTIL_THROW_IF2(!ret.second, "Error");
|
||||
}
|
||||
|
||||
return totalTargetSize;
|
||||
}
|
||||
|
||||
void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
|
||||
{
|
||||
ostringstream out;
|
||||
|
@ -40,6 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Sentence.h"
|
||||
#include "moses/FactorTypeSet.h"
|
||||
#include "moses/ChartKBestExtractor.h"
|
||||
#include "moses/ChartTrellisPathList.h"
|
||||
#include "moses/OutputCollector.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
@ -90,6 +91,7 @@ protected:
|
||||
|
||||
typedef std::set< std::pair<size_t, size_t> > Alignments;
|
||||
size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartTrellisNode &node, size_t startTarget);
|
||||
std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartKBestExtractor::Derivation &derivation, std::size_t startTarget);
|
||||
size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
|
||||
void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
|
||||
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
@ -129,12 +131,13 @@ public:
|
||||
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId);
|
||||
void OutputBestNone(long translationId);
|
||||
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId);
|
||||
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
|
||||
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
|
||||
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedAllTranslationReport(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
|
||||
void Backtrack(const Moses::ChartHypothesis *hypo);
|
||||
|
||||
void ResetTranslationId();
|
||||
|
@ -151,7 +151,7 @@ public:
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
ChartTrellisPathList nBestList;
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
|
||||
m_ioWrapper.OutputDetailedAllTranslationReport(nBestList, manager, sentence, translationId);
|
||||
}
|
||||
@ -160,7 +160,7 @@ public:
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
if (nBestSize > 0) {
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
ChartTrellisPathList nBestList;
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
m_ioWrapper.OutputNBestList(nBestList, translationId);
|
||||
IFVERBOSE(2) {
|
||||
|
@ -180,6 +180,7 @@ public:
|
||||
} else {
|
||||
TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
|
||||
}
|
||||
delete file;
|
||||
}
|
||||
|
||||
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
|
||||
@ -233,7 +234,7 @@ public:
|
||||
|
||||
} else {
|
||||
stringstream hypergraphDirName;
|
||||
hypergraphDirName << boost::filesystem::current_path() << "/hypergraph";
|
||||
hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
|
||||
hypergraphDir = hypergraphDirName.str();
|
||||
}
|
||||
}
|
||||
@ -536,9 +537,7 @@ size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff
|
||||
}
|
||||
return index+numScoreComps;
|
||||
} else {
|
||||
cerr << "Sparse features are not yet supported when outputting hypergraph format" << endl;
|
||||
assert(false);
|
||||
return 0;
|
||||
UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
|
||||
}
|
||||
}
|
||||
|
||||
@ -650,7 +649,7 @@ int main(int argc, char** argv)
|
||||
boost::filesystem::path nbestPath(nbestFile);
|
||||
weightsFilename << nbestPath.parent_path().filename() << "/weights";
|
||||
} else {
|
||||
weightsFilename << boost::filesystem::current_path() << "/hypergraph/weights";
|
||||
weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
|
||||
}
|
||||
}
|
||||
boost::filesystem::path weightsFilePath(weightsFilename.str());
|
||||
|
@ -114,8 +114,11 @@ void ChartCell::SortHypotheses()
|
||||
MapType::iterator iter;
|
||||
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
||||
ChartHypothesisCollection &coll = iter->second;
|
||||
coll.SortHypotheses();
|
||||
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
|
||||
|
||||
if (coll.GetSize()) {
|
||||
coll.SortHypotheses();
|
||||
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,6 +66,22 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
|
||||
}
|
||||
}
|
||||
|
||||
// Intended to be used by ChartKBestExtractor only. This creates a mock
|
||||
// ChartHypothesis for use by the extractor's top-level target vertex.
|
||||
ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
|
||||
const ChartKBestExtractor & /*unused*/)
|
||||
:m_currSourceWordsRange(pred.m_currSourceWordsRange)
|
||||
,m_scoreBreakdown(pred.m_scoreBreakdown)
|
||||
,m_totalScore(pred.m_totalScore)
|
||||
,m_arcList(NULL)
|
||||
,m_winningHypo(NULL)
|
||||
,m_manager(pred.m_manager)
|
||||
,m_id(pred.m_manager.GetNextHypoId())
|
||||
{
|
||||
// One predecessor, which is an existing top-level ChartHypothesis.
|
||||
m_prevHypos.push_back(&pred);
|
||||
}
|
||||
|
||||
ChartHypothesis::~ChartHypothesis()
|
||||
{
|
||||
// delete feature function states
|
||||
|
@ -31,6 +31,7 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ChartKBestExtractor;
|
||||
class ChartHypothesis;
|
||||
class ChartManager;
|
||||
class RuleCubeItem;
|
||||
@ -44,6 +45,7 @@ typedef std::vector<ChartHypothesis*> ChartArcList;
|
||||
class ChartHypothesis
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
|
||||
friend class ChartKBestExtractor;
|
||||
|
||||
protected:
|
||||
#ifdef USE_HYPO_POOL
|
||||
@ -74,6 +76,9 @@ protected:
|
||||
//! not implemented
|
||||
ChartHypothesis(const ChartHypothesis ©);
|
||||
|
||||
//! only used by ChartKBestExtractor
|
||||
ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
|
||||
|
||||
public:
|
||||
#ifdef USE_HYPO_POOL
|
||||
void *operator new(size_t /* num_bytes */) {
|
||||
|
279
moses/ChartKBestExtractor.cpp
Normal file
279
moses/ChartKBestExtractor.cpp
Normal file
@ -0,0 +1,279 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2014 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "ChartKBestExtractor.h"
|
||||
|
||||
#include "ChartHypothesis.h"
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// Extract the k-best list from the search graph.
|
||||
void ChartKBestExtractor::Extract(
|
||||
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
|
||||
KBestVec &kBestList)
|
||||
{
|
||||
kBestList.clear();
|
||||
if (topLevelHypos.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a new ChartHypothesis object, supremeHypo, that has the best
|
||||
// top-level hypothesis as its predecessor and has the same score.
|
||||
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
|
||||
const ChartHypothesis &bestTopLevelHypo = **p;
|
||||
boost::scoped_ptr<ChartHypothesis> supremeHypo(
|
||||
new ChartHypothesis(bestTopLevelHypo, *this));
|
||||
|
||||
// Do the same for each alternative top-level hypothesis, but add the new
|
||||
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
|
||||
// recombined.
|
||||
for (++p; p != topLevelHypos.end(); ++p) {
|
||||
// Check that the first item in topLevelHypos really was the best.
|
||||
UTIL_THROW_IF2((*p)->GetTotalScore() <= bestTopLevelHypo.GetTotalScore(),
|
||||
"top-level hypotheses are not correctly sorted");
|
||||
// Note: there's no need for a smart pointer here: supremeHypo will take
|
||||
// ownership of altHypo.
|
||||
ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
|
||||
supremeHypo->AddArc(altHypo);
|
||||
}
|
||||
|
||||
// Create the target vertex then lazily fill its k-best list.
|
||||
boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
|
||||
LazyKthBest(*targetVertex, k, k);
|
||||
|
||||
// Copy the k-best list from the target vertex, but drop the top edge from
|
||||
// each derivation.
|
||||
kBestList.reserve(targetVertex->kBestList.size());
|
||||
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
|
||||
q = targetVertex->kBestList.begin();
|
||||
q != targetVertex->kBestList.end(); ++q) {
|
||||
const boost::shared_ptr<Derivation> d(*q);
|
||||
assert(d);
|
||||
assert(d->subderivations.size() == 1);
|
||||
kBestList.push_back(d->subderivations[0]);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the target-side yield of the derivation d.
|
||||
Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
|
||||
{
|
||||
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
|
||||
|
||||
Phrase ret(ARRAY_SIZE_INCR);
|
||||
|
||||
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
||||
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
||||
const Word &word = phrase.GetWord(pos);
|
||||
if (word.IsNonTerminal()) {
|
||||
std::size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
||||
Phrase subPhrase = GetOutputPhrase(subderivation);
|
||||
ret.Append(subPhrase);
|
||||
} else {
|
||||
ret.AddWord(word);
|
||||
if (placeholderFactor == NOT_FOUND) {
|
||||
continue;
|
||||
}
|
||||
std::set<std::size_t> sourcePosSet =
|
||||
phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
|
||||
if (sourcePosSet.size() == 1) {
|
||||
const std::vector<const Word*> *ruleSourceFromInputPath =
|
||||
hypo.GetTranslationOption().GetSourceRuleFromInputPath();
|
||||
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
||||
"Source Words in of the rules hasn't been filled out");
|
||||
std::size_t sourcePos = *sourcePosSet.begin();
|
||||
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
||||
UTIL_THROW_IF2(sourceWord == NULL,
|
||||
"Null source word at position " << sourcePos);
|
||||
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
||||
if (factor) {
|
||||
ret.Back()[0] = factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
|
||||
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
|
||||
const ChartHypothesis &h)
|
||||
{
|
||||
UnweightedHyperarc edge;
|
||||
edge.head = FindOrCreateVertex(h);
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
||||
edge.tail.resize(prevHypos.size());
|
||||
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
||||
const ChartHypothesis *prevHypo = prevHypos[i];
|
||||
edge.tail[i] = FindOrCreateVertex(*prevHypo);
|
||||
}
|
||||
return edge;
|
||||
}
|
||||
|
||||
// Look for the vertex corresponding to a given ChartHypothesis, creating
|
||||
// a new one if necessary.
|
||||
boost::shared_ptr<ChartKBestExtractor::Vertex>
|
||||
ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
|
||||
{
|
||||
VertexMap::value_type element(&h, boost::shared_ptr<Vertex>());
|
||||
std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
|
||||
boost::shared_ptr<Vertex> &sp = p.first->second;
|
||||
if (!p.second) {
|
||||
return sp; // Vertex was already in m_vertexMap.
|
||||
}
|
||||
sp.reset(new Vertex(h));
|
||||
// Create the 1-best derivation and add it to the vertex's kBestList.
|
||||
UnweightedHyperarc bestEdge;
|
||||
bestEdge.head = sp;
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
||||
bestEdge.tail.resize(prevHypos.size());
|
||||
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
||||
const ChartHypothesis *prevHypo = prevHypos[i];
|
||||
bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
|
||||
}
|
||||
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
|
||||
std::pair<DerivationSet::iterator, bool> q =
|
||||
m_derivations.insert(bestDerivation);
|
||||
assert(q.second);
|
||||
sp->kBestList.push_back(bestDerivation);
|
||||
return sp;
|
||||
}
|
||||
|
||||
// Create the 1-best derivation for each edge in BS(v) (except the best one)
|
||||
// and add it to v's candidate queue.
|
||||
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
|
||||
{
|
||||
// Create derivations for all of v's incoming edges except the best. This
|
||||
// means everything in v.hypothesis.GetArcList() and not the edge defined
|
||||
// by v.hypothesis itself. The 1-best derivation for that edge will already
|
||||
// have been created.
|
||||
const ChartArcList *arcList = v.hypothesis.GetArcList();
|
||||
if (arcList) {
|
||||
for (std::size_t i = 0; i < arcList->size(); ++i) {
|
||||
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
|
||||
boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
|
||||
assert(w->kBestList.size() == 1);
|
||||
v.candidates.push(w->kBestList[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lazily fill v's k-best list.
|
||||
void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
|
||||
std::size_t globalK)
|
||||
{
|
||||
// If this is the first visit to vertex v then initialize the priority queue.
|
||||
if (v.visited == false) {
|
||||
// The 1-best derivation should already be in v's k-best list.
|
||||
assert(v.kBestList.size() == 1);
|
||||
// Initialize v's priority queue.
|
||||
GetCandidates(v, globalK);
|
||||
v.visited = true;
|
||||
}
|
||||
// Add derivations to the k-best list until it contains k or there are none
|
||||
// left to add.
|
||||
while (v.kBestList.size() < k) {
|
||||
assert(!v.kBestList.empty());
|
||||
// Update the priority queue by adding the successors of the last
|
||||
// derivation (unless they've been seen before).
|
||||
boost::shared_ptr<Derivation> d(v.kBestList.back());
|
||||
LazyNext(v, *d, globalK);
|
||||
// Check if there are any derivations left in the queue.
|
||||
if (v.candidates.empty()) {
|
||||
break;
|
||||
}
|
||||
// Get the next best derivation and delete it from the queue.
|
||||
boost::weak_ptr<Derivation> next = v.candidates.top();
|
||||
v.candidates.pop();
|
||||
// Add it to the k-best list.
|
||||
v.kBestList.push_back(next);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the neighbours of Derivation d and add them to v's candidate queue.
|
||||
void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
|
||||
std::size_t globalK)
|
||||
{
|
||||
for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
|
||||
Vertex &pred = *d.edge.tail[i];
|
||||
// Ensure that pred's k-best list contains enough derivations.
|
||||
std::size_t k = d.backPointers[i] + 2;
|
||||
LazyKthBest(pred, k, globalK);
|
||||
if (pred.kBestList.size() < k) {
|
||||
// pred's derivations have been exhausted.
|
||||
continue;
|
||||
}
|
||||
// Create the neighbour.
|
||||
boost::shared_ptr<Derivation> next(new Derivation(d, i));
|
||||
// Check if it has been created before.
|
||||
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
|
||||
if (p.second) {
|
||||
v.candidates.push(next); // Haven't previously seen it.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct the 1-best Derivation that ends at edge e.
|
||||
ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
|
||||
{
|
||||
edge = e;
|
||||
std::size_t arity = edge.tail.size();
|
||||
backPointers.resize(arity, 0);
|
||||
subderivations.reserve(arity);
|
||||
for (std::size_t i = 0; i < arity; ++i) {
|
||||
const Vertex &pred = *edge.tail[i];
|
||||
assert(pred.kBestList.size() >= 1);
|
||||
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
|
||||
subderivations.push_back(sub);
|
||||
}
|
||||
scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
|
||||
score = edge.head->hypothesis.GetTotalScore();
|
||||
}
|
||||
|
||||
// Construct a Derivation that neighbours an existing Derivation.
|
||||
ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
|
||||
{
|
||||
edge.head = d.edge.head;
|
||||
edge.tail = d.edge.tail;
|
||||
backPointers = d.backPointers;
|
||||
subderivations = d.subderivations;
|
||||
std::size_t j = ++backPointers[i];
|
||||
scoreBreakdown = d.scoreBreakdown;
|
||||
// Deduct the score of the old subderivation.
|
||||
scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
|
||||
// Update the subderivation pointer.
|
||||
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
|
||||
subderivations[i] = newSub;
|
||||
// Add the score of the new subderivation.
|
||||
scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
|
||||
score = scoreBreakdown.GetWeightedScore();
|
||||
}
|
||||
|
||||
} // namespace Moses
|
129
moses/ChartKBestExtractor.h
Normal file
129
moses/ChartKBestExtractor.h
Normal file
@ -0,0 +1,129 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2014 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include "ChartHypothesis.h"
|
||||
#include "ScoreComponentCollection.h"
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <boost/weak_ptr.hpp>
|
||||
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// k-best list extractor that implements algorithm 3 from this paper:
|
||||
//
|
||||
// Liang Huang and David Chiang
|
||||
// "Better k-best parsing"
|
||||
// In Proceedings of IWPT 2005
|
||||
//
|
||||
class ChartKBestExtractor
|
||||
{
|
||||
public:
|
||||
struct Vertex;
|
||||
|
||||
struct UnweightedHyperarc {
|
||||
boost::shared_ptr<Vertex> head;
|
||||
std::vector<boost::shared_ptr<Vertex> > tail;
|
||||
};
|
||||
|
||||
struct Derivation {
|
||||
Derivation(const UnweightedHyperarc &);
|
||||
Derivation(const Derivation &, std::size_t);
|
||||
|
||||
UnweightedHyperarc edge;
|
||||
std::vector<std::size_t> backPointers;
|
||||
std::vector<boost::shared_ptr<Derivation> > subderivations;
|
||||
ScoreComponentCollection scoreBreakdown;
|
||||
float score;
|
||||
};
|
||||
|
||||
struct DerivationOrderer {
|
||||
bool operator()(const boost::weak_ptr<Derivation> &d1,
|
||||
const boost::weak_ptr<Derivation> &d2) const {
|
||||
boost::shared_ptr<Derivation> s1(d1);
|
||||
boost::shared_ptr<Derivation> s2(d2);
|
||||
return s1->score < s2->score;
|
||||
}
|
||||
};
|
||||
|
||||
struct Vertex {
|
||||
typedef std::priority_queue<boost::weak_ptr<Derivation>,
|
||||
std::vector<boost::weak_ptr<Derivation> >,
|
||||
DerivationOrderer> DerivationQueue;
|
||||
|
||||
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
|
||||
|
||||
const ChartHypothesis &hypothesis;
|
||||
std::vector<boost::weak_ptr<Derivation> > kBestList;
|
||||
DerivationQueue candidates;
|
||||
bool visited;
|
||||
};
|
||||
|
||||
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
|
||||
|
||||
// Extract the k-best list from the search hypergraph given the full, sorted
|
||||
// list of top-level vertices.
|
||||
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
|
||||
std::size_t k, KBestVec &);
|
||||
|
||||
static Phrase GetOutputPhrase(const Derivation &);
|
||||
|
||||
private:
|
||||
typedef boost::unordered_map<const ChartHypothesis *,
|
||||
boost::shared_ptr<Vertex> > VertexMap;
|
||||
|
||||
struct DerivationHasher {
|
||||
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, d->edge.head);
|
||||
boost::hash_combine(seed, d->edge.tail);
|
||||
boost::hash_combine(seed, d->backPointers);
|
||||
return seed;
|
||||
}
|
||||
};
|
||||
|
||||
struct DerivationEqualityPred {
|
||||
bool operator()(const boost::shared_ptr<Derivation> &d1,
|
||||
const boost::shared_ptr<Derivation> &d2) const {
|
||||
return d1->edge.head == d2->edge.head &&
|
||||
d1->edge.tail == d2->edge.tail &&
|
||||
d1->backPointers == d2->backPointers;
|
||||
}
|
||||
};
|
||||
|
||||
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
|
||||
DerivationEqualityPred> DerivationSet;
|
||||
|
||||
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
|
||||
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
|
||||
void GetCandidates(Vertex &, std::size_t);
|
||||
void LazyKthBest(Vertex &, std::size_t, std::size_t);
|
||||
void LazyNext(Vertex &, const Derivation &, std::size_t);
|
||||
|
||||
VertexMap m_vertexMap;
|
||||
DerivationSet m_derivations;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
@ -23,6 +23,7 @@
|
||||
#include "ChartManager.h"
|
||||
#include "ChartCell.h"
|
||||
#include "ChartHypothesis.h"
|
||||
#include "ChartKBestExtractor.h"
|
||||
#include "ChartTranslationOptions.h"
|
||||
#include "ChartTrellisDetourQueue.h"
|
||||
#include "ChartTrellisNode.h"
|
||||
@ -261,6 +262,65 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
|
||||
}
|
||||
}
|
||||
|
||||
/** Calculate the n-best paths through the output hypergraph.
|
||||
* Return the list of paths with the variable ret
|
||||
* \param n how may paths to return
|
||||
* \param ret return argument
|
||||
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
|
||||
*/
|
||||
void ChartManager::CalcNBest(
|
||||
std::size_t n,
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
|
||||
bool onlyDistinct) const
|
||||
{
|
||||
nBestList.clear();
|
||||
if (n == 0 || m_source.GetSize() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the list of top-level hypotheses, sorted by score.
|
||||
WordsRange range(0, m_source.GetSize()-1);
|
||||
const ChartCell &lastCell = m_hypoStackColl.Get(range);
|
||||
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
|
||||
lastCell.GetAllSortedHypotheses());
|
||||
if (!topLevelHypos) {
|
||||
return;
|
||||
}
|
||||
|
||||
ChartKBestExtractor extractor;
|
||||
|
||||
if (!onlyDistinct) {
|
||||
// Return the n-best list as is, including duplicate translations.
|
||||
extractor.Extract(*topLevelHypos, n, nBestList);
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine how many derivations to extract. If the n-best list is
|
||||
// restricted to distinct translations then this limit should be bigger
|
||||
// than n. The n-best factor determines how much bigger the limit should be,
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
ChartKBestExtractor::KBestVec bigList;
|
||||
bigList.reserve(numDerivations);
|
||||
extractor.Extract(*topLevelHypos, numDerivations, bigList);
|
||||
|
||||
// Copy derivations into nBestList, skipping ones with repeated translations.
|
||||
std::set<Phrase> distinct;
|
||||
for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
|
||||
nBestList.size() < n && p != bigList.end(); ++p) {
|
||||
boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
|
||||
Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
|
||||
if (distinct.insert(translation).second) {
|
||||
nBestList.push_back(derivation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
|
||||
{
|
||||
size_t size = m_source.GetSize();
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "SentenceStats.h"
|
||||
#include "ChartTranslationOptionList.h"
|
||||
#include "ChartParser.h"
|
||||
#include "ChartKBestExtractor.h"
|
||||
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
@ -71,6 +72,7 @@ public:
|
||||
void AddXmlChartOptions();
|
||||
const ChartHypothesis *GetBestHypothesis() const;
|
||||
void CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct=0) const;
|
||||
void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
|
||||
|
||||
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
|
||||
void FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const; /* auxilliary function for GetSearchGraph */
|
||||
|
@ -164,8 +164,8 @@ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell)
|
||||
const HypoList *stack = chartCell->GetStack().cube;
|
||||
assert(stack);
|
||||
assert(!stack->empty());
|
||||
const ChartHypothesis &bestHypo = **(stack->begin());
|
||||
return bestHypo.GetTotalScore();
|
||||
const ChartHypothesis &bestHypo = **(stack->begin());
|
||||
return bestHypo.GetTotalScore();
|
||||
}
|
||||
|
||||
void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath)
|
||||
|
@ -39,6 +39,8 @@
|
||||
#include "moses/FF/HyperParameterAsWeight.h"
|
||||
#include "moses/FF/SetSourcePhrase.h"
|
||||
#include "CountNonTerms.h"
|
||||
#include "ReferenceComparison.h"
|
||||
#include "RuleAmbiguity.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
#include "moses/FF/SkeletonStatefulFF.h"
|
||||
@ -181,6 +183,8 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(HyperParameterAsWeight);
|
||||
MOSES_FNAME(SetSourcePhrase);
|
||||
MOSES_FNAME(CountNonTerms);
|
||||
MOSES_FNAME(ReferenceComparison);
|
||||
MOSES_FNAME(RuleAmbiguity);
|
||||
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
|
11
moses/FF/ReferenceComparison.cpp
Normal file
11
moses/FF/ReferenceComparison.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include "ReferenceComparison.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
ReferenceComparison::ReferenceComparison(const std::string &line)
|
||||
:StatelessFeatureFunction(0, line)
|
||||
{
|
||||
}
|
||||
|
||||
}
|
||||
|
47
moses/FF/ReferenceComparison.h
Normal file
47
moses/FF/ReferenceComparison.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// Count how many hypotheses are in each stack, compare score with reference hypo
|
||||
// NOT threadsafe.
|
||||
class ReferenceComparison : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
ReferenceComparison(const std::string &line);
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const
|
||||
{ return true; }
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
virtual void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
virtual void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
std::vector<float> DefaultWeights() const
|
||||
{ return std::vector<float>(); }
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
61
moses/FF/RuleAmbiguity.cpp
Normal file
61
moses/FF/RuleAmbiguity.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
#include "RuleAmbiguity.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/Word.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
RuleAmbiguity::RuleAmbiguity(const std::string &line)
|
||||
:StatelessFeatureFunction(1, line)
|
||||
,m_sourceSyntax(true)
|
||||
{
|
||||
}
|
||||
|
||||
bool IsAmbiguous(const Word &word, bool sourceSyntax)
|
||||
{
|
||||
const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
|
||||
return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
|
||||
}
|
||||
|
||||
void RuleAmbiguity::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
// source can't be empty, right?
|
||||
float score = 0;
|
||||
|
||||
int count = 0;
|
||||
for (size_t i = 0; i < source.GetSize() - 0; ++i) {
|
||||
const Word &word = source.GetWord(i);
|
||||
bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
|
||||
if (ambiguous) {
|
||||
++count;
|
||||
}
|
||||
else {
|
||||
if (count > 0) {
|
||||
score += count;
|
||||
}
|
||||
count = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 1st & last always adjacent to ambiguity
|
||||
++count;
|
||||
if (count > 0) {
|
||||
score += count;
|
||||
}
|
||||
|
||||
scoreBreakdown.PlusEquals(this, score);
|
||||
}
|
||||
|
||||
void RuleAmbiguity::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "source-syntax") {
|
||||
m_sourceSyntax = Scan<bool>(value);
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
44
moses/FF/RuleAmbiguity.h
Normal file
44
moses/FF/RuleAmbiguity.h
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// similar to Scope, however, adjacent non-term count as 1 ammbiguity, rather than 2
|
||||
class RuleAmbiguity : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
RuleAmbiguity(const std::string &line);
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const
|
||||
{ return true; }
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
virtual void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
virtual void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
bool m_sourceSyntax;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
|
||||
class SetSourcePhrase : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
|
@ -102,8 +102,8 @@ Parameter::Parameter()
|
||||
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
|
||||
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
|
||||
AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
|
||||
AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF)");
|
||||
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder)");
|
||||
AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed byy a directory name, which must exist");
|
||||
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
|
||||
AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
|
||||
#ifdef HAVE_PROTOBUF
|
||||
AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
|
||||
|
@ -121,6 +121,14 @@ public:
|
||||
return m_words[GetSize() - 1];
|
||||
}
|
||||
|
||||
inline const Word &Front() const {
|
||||
return m_words[0];
|
||||
}
|
||||
|
||||
inline const Word &Back() const {
|
||||
return m_words[GetSize() - 1];
|
||||
}
|
||||
|
||||
//! particular factor at a particular position
|
||||
inline const Factor *GetFactor(size_t pos, FactorType factorType) const {
|
||||
const Word &ptr = m_words[pos];
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <boost/thread.hpp>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
void GenericCandidate::readBin(FILE* f)
|
||||
@ -62,6 +64,17 @@ void Candidates::readBin(FILE* f)
|
||||
|
||||
const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
PrefixTreeMap::~PrefixTreeMap() {
|
||||
if(m_FileSrc) {
|
||||
fClose(m_FileSrc);
|
||||
}
|
||||
if(m_FileTgt) {
|
||||
fClose(m_FileTgt);
|
||||
}
|
||||
FreeMemory();
|
||||
}
|
||||
|
||||
|
||||
void PrefixTreeMap::FreeMemory()
|
||||
{
|
||||
@ -75,20 +88,21 @@ void PrefixTreeMap::FreeMemory()
|
||||
m_PtrPool.reset();
|
||||
}
|
||||
|
||||
static WordVoc* ReadVoc(const std::string& filename)
|
||||
WordVoc &ReadVoc(std::map<std::string,WordVoc> &vocs, const std::string& filename)
|
||||
{
|
||||
static std::map<std::string,WordVoc*> vocs;
|
||||
#ifdef WITH_THREADS
|
||||
boost::mutex mutex;
|
||||
boost::mutex::scoped_lock lock(mutex);
|
||||
#endif
|
||||
std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
|
||||
std::map<std::string,WordVoc>::iterator vi = vocs.find(filename);
|
||||
if (vi == vocs.end()) {
|
||||
WordVoc* voc = new WordVoc();
|
||||
voc->Read(filename);
|
||||
vocs[filename] = voc;
|
||||
WordVoc &voc = vocs[filename];
|
||||
voc.Read(filename);
|
||||
return voc;
|
||||
}
|
||||
else {
|
||||
return vi->second;
|
||||
}
|
||||
return vocs[filename];
|
||||
}
|
||||
|
||||
int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs)
|
||||
@ -133,7 +147,7 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs)
|
||||
sprintf(num, "%d", i);
|
||||
//m_Voc[i] = new WordVoc();
|
||||
//m_Voc[i]->Read(ifv + num);
|
||||
m_Voc[i] = ReadVoc(ifv + num);
|
||||
m_Voc[i] = &ReadVoc(m_vocs, ifv + num);
|
||||
}
|
||||
|
||||
TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
|
||||
|
@ -99,18 +99,11 @@ public:
|
||||
PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
|
||||
PTF::setDefault(InvalidOffT);
|
||||
}
|
||||
~PrefixTreeMap() {
|
||||
if(m_FileSrc) {
|
||||
fClose(m_FileSrc);
|
||||
}
|
||||
if(m_FileTgt) {
|
||||
fClose(m_FileTgt);
|
||||
}
|
||||
FreeMemory();
|
||||
}
|
||||
~PrefixTreeMap();
|
||||
|
||||
public:
|
||||
static const LabelId MagicWord;
|
||||
public:
|
||||
|
||||
void FreeMemory();
|
||||
|
||||
int Read(const std::string& fileNameStem, int numVocs = -1);
|
||||
@ -135,6 +128,7 @@ private:
|
||||
|
||||
std::vector<WordVoc*> m_Voc;
|
||||
ObjectPool<PPimp> m_PtrPool;
|
||||
std::map<std::string,WordVoc> m_vocs;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -218,7 +218,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
// parse source & find pt node
|
||||
|
||||
// constituent labels
|
||||
Word *sourceLHS;
|
||||
Word *sourceLHS = NULL;
|
||||
Word *targetLHS;
|
||||
|
||||
// create target phrase obj
|
||||
@ -251,6 +251,9 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
|
||||
phraseColl.Add(targetPhrase);
|
||||
|
||||
// not implemented correctly in memory pt. just delete it for now
|
||||
delete sourceLHS;
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
|
230
scripts/Transliteration/in-decoding-transliteration.pl
Executable file
230
scripts/Transliteration/in-decoding-transliteration.pl
Executable file
@ -0,0 +1,230 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
use utf8;
|
||||
use File::Basename;
|
||||
use Getopt::Long "GetOptions";
|
||||
use FindBin qw($RealBin);
|
||||
use Scalar::Util qw(looks_like_number);
|
||||
use IO::Handle;
|
||||
binmode(STDIN, ':utf8');
|
||||
binmode(STDOUT, ':utf8');
|
||||
binmode(STDERR, ':utf8');
|
||||
|
||||
my $___FACTOR_DELIMITER = "|";
|
||||
my $OUT_FILE = "/tmp/transliteration-phrase-table.$$";
|
||||
|
||||
my ($MOSES_SRC_DIR,$TRANSLIT_MODEL,$OOV_FILE, $OOV_FILE_NAME, $EXTERNAL_BIN_DIR, $LM_FILE, $INPUT_EXTENSION, $OUTPUT_EXTENSION);
|
||||
die("ERROR: wrong syntax when invoking in-decoding-transliteration.perl")
|
||||
unless &GetOptions('moses-src-dir=s' => \$MOSES_SRC_DIR,
|
||||
'external-bin-dir=s' => \$EXTERNAL_BIN_DIR,
|
||||
'transliteration-model-dir=s' => \$TRANSLIT_MODEL,
|
||||
'input-extension=s' => \$INPUT_EXTENSION,
|
||||
'output-extension=s' => \$OUTPUT_EXTENSION,
|
||||
'transliteration-file=s' => \$OOV_FILE,
|
||||
'out-file=s' => \$OUT_FILE);
|
||||
|
||||
# check if the files are in place
|
||||
die("ERROR: you need to define --moses-src-dir --external-bin-dir, --transliteration-model-dir, --transliteration-file, --input-extension, and --output-extension")
|
||||
unless (defined($MOSES_SRC_DIR) &&
|
||||
defined($TRANSLIT_MODEL) &&
|
||||
defined($OOV_FILE) &&
|
||||
defined($INPUT_EXTENSION)&&
|
||||
defined($OUTPUT_EXTENSION)&&
|
||||
defined($EXTERNAL_BIN_DIR));
|
||||
|
||||
die("ERROR: could not find Transliteration Model '$TRANSLIT_MODEL'")
|
||||
unless -e $TRANSLIT_MODEL;
|
||||
die("ERROR: could not find Transliteration file $OOV_FILE'")
|
||||
unless -e $OOV_FILE;
|
||||
|
||||
$OOV_FILE_NAME = basename ($OOV_FILE);
|
||||
|
||||
`mkdir $TRANSLIT_MODEL/evaluation`;
|
||||
`cp $OOV_FILE $TRANSLIT_MODEL/evaluation/`;
|
||||
my $translitFile = $TRANSLIT_MODEL . "/evaluation/" . $OOV_FILE_NAME;
|
||||
|
||||
print "Preparing for Transliteration\n";
|
||||
prepare_for_transliteration ($OOV_FILE, $translitFile);
|
||||
print "Run Transliteration\n";
|
||||
run_transliteration ($MOSES_SRC_DIR , $EXTERNAL_BIN_DIR , $TRANSLIT_MODEL , $OOV_FILE_NAME);
|
||||
print "Pick Best Transliteration\n";
|
||||
form_corpus ($translitFile , $translitFile.".op.nBest" , $OUT_FILE);
|
||||
|
||||
|
||||
################### Read the UNK word file and prepare for Transliteration ###############################
|
||||
|
||||
sub prepare_for_transliteration
|
||||
{
|
||||
my @list = @_;
|
||||
my $testFile = $list[0];
|
||||
my $translitFile = $list[1];
|
||||
my %UNK;
|
||||
my @words;
|
||||
my $src;
|
||||
my @tW;
|
||||
|
||||
open MYFILE, "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n";
|
||||
|
||||
while (<MYFILE>)
|
||||
{
|
||||
chomp;
|
||||
#print "$_\n";
|
||||
@words = split(/ /, "$_");
|
||||
|
||||
foreach (@words)
|
||||
{
|
||||
|
||||
@tW = split /\Q$___FACTOR_DELIMITER/;
|
||||
|
||||
if (defined $tW[0])
|
||||
{
|
||||
|
||||
if (! ($tW[0] =~ /[0-9.,]/))
|
||||
{
|
||||
$UNK{$tW[0]} = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
print "Not transliterating $tW[0] \n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
close (MYFILE);
|
||||
|
||||
open MYFILE, ">:encoding(UTF-8)", $translitFile or die "Can't open $translitFile: $!\n";
|
||||
|
||||
foreach my $key ( keys %UNK )
|
||||
{
|
||||
$src=join(' ', split('',$key));
|
||||
print MYFILE "$src\n";
|
||||
}
|
||||
close (MYFILE);
|
||||
}
|
||||
|
||||
################### Run Transliteration Module to Obtain Transliterations ###############################
|
||||
|
||||
sub run_transliteration
|
||||
{
|
||||
my @list = @_;
|
||||
my $MOSES_SRC = $list[0];
|
||||
my $EXTERNAL_BIN_DIR = $list[1];
|
||||
my $TRANSLIT_MODEL = $list[2];
|
||||
my $eval_file = $list[3];
|
||||
|
||||
`touch $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
|
||||
|
||||
print "Filter Table\n";
|
||||
|
||||
`$MOSES_SRC/scripts/training/train-model.perl -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 -score-options '--KneserNey' -phrase-translation-table $TRANSLIT_MODEL/model/phrase-table -config $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini -lm 0:3:$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini:8`;
|
||||
|
||||
`$MOSES_SRC/scripts/training/filter-model-given-input.pl $TRANSLIT_MODEL/evaluation/$eval_file.filtered $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini $TRANSLIT_MODEL/evaluation/$eval_file -Binarizer "$MOSES_SRC/bin/processPhraseTable"`;
|
||||
|
||||
`rm $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
|
||||
|
||||
print "Apply Filter\n";
|
||||
|
||||
`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl $TRANSLIT_MODEL/evaluation/$eval_file.filtered/moses.ini $TRANSLIT_MODEL/model/moses.ini $TRANSLIT_MODEL/tuning/moses.tuned.ini $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini`;
|
||||
|
||||
`$MOSES_SRC/bin/moses -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 -threads 16 -drop-unknown -distortion-limit 0 -n-best-list $TRANSLIT_MODEL/evaluation/$eval_file.op.nBest 100 distinct -f $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini < $TRANSLIT_MODEL/evaluation/$eval_file > $TRANSLIT_MODEL/evaluation/$eval_file.op`;
|
||||
|
||||
}
|
||||
|
||||
################### Read the output of Transliteration Model and Form Corpus ###############################
|
||||
|
||||
|
||||
sub form_corpus
|
||||
{
|
||||
|
||||
my @list = @_;
|
||||
my $inp_file = $list[0];
|
||||
my $testFile = $list[1];
|
||||
my @words;
|
||||
my $thisStr;
|
||||
my $features;
|
||||
my $prev = 0;
|
||||
my $sNum;
|
||||
my @UNK;
|
||||
my %vocab;
|
||||
|
||||
my $antLog = exp(0.2);
|
||||
my $phraseTable = $list[2];
|
||||
|
||||
open MYFILE, "<:encoding(UTF-8)", $inp_file or die "Can't open $inp_file: $!\n";
|
||||
open PT, ">:encoding(UTF-8)", $phraseTable or die "Can't open $phraseTable: $!\n";
|
||||
|
||||
while (<MYFILE>)
|
||||
{
|
||||
chomp;
|
||||
#print "$_\n";
|
||||
@words = split(/ /, "$_");
|
||||
|
||||
$thisStr = "";
|
||||
foreach (@words)
|
||||
{
|
||||
$thisStr = $thisStr . "$_";
|
||||
}
|
||||
|
||||
push(@UNK, $thisStr);
|
||||
$vocab{$thisStr} = 1;
|
||||
}
|
||||
close (MYFILE);
|
||||
|
||||
open MYFILE, "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n";
|
||||
my $inpCount = 0;
|
||||
|
||||
while (<MYFILE>)
|
||||
{
|
||||
chomp;
|
||||
#print "$_\n";
|
||||
@words = split(/ /, "$_");
|
||||
|
||||
$sNum = $words[0];
|
||||
|
||||
if ($prev != $sNum){
|
||||
$inpCount++;
|
||||
}
|
||||
|
||||
my $i = 2;
|
||||
$thisStr = "";
|
||||
$features = "";
|
||||
|
||||
while ($words[$i] ne "|||")
|
||||
{
|
||||
$thisStr = $thisStr . $words[$i];
|
||||
$i++;
|
||||
}
|
||||
|
||||
$i++;
|
||||
|
||||
while ($words[$i] ne "|||")
|
||||
{
|
||||
if ($words[$i] =~ /Penalty0/ || $words[$i] eq "Distortion0=" || $words[$i] eq "LM0=" ){
|
||||
$i++;
|
||||
}
|
||||
elsif (looks_like_number($words[$i])){
|
||||
$features = $features . " " . exp($words[$i]);
|
||||
}
|
||||
|
||||
$i++;
|
||||
}
|
||||
$i++;
|
||||
|
||||
#$features = $features . " " . $words[$i];
|
||||
|
||||
if ($thisStr ne ""){
|
||||
print PT "$UNK[$inpCount] ||| $thisStr ||| $features ||| 0-0 ||| 0 0 0\n";
|
||||
}
|
||||
$prev = $sNum;
|
||||
}
|
||||
close (MYFILE);
|
||||
close (PT);
|
||||
|
||||
|
||||
`gzip $phraseTable`;
|
||||
|
||||
}
|
||||
|
||||
|
@ -533,6 +533,13 @@ build-transliteration-model
|
||||
ignore-unless: transliteration-module
|
||||
rerun-on-change: transliteration-module training-options script giza-settings
|
||||
default-name: model/Transliteration
|
||||
build-translit-table
|
||||
in: transliteration-model
|
||||
out: transliteration-table
|
||||
ignore-unless: in-decoding-transliteration
|
||||
rerun-on-change: in-decoding-transliteration transliteration-module
|
||||
default-name: model/transliteration-phrase-table
|
||||
template: $moses-script-dir/Transliteration/in-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN --input-extension $input-extension --output-extension $output-extension --transliteration-file $transliteration-file --out-file OUT
|
||||
extract-phrases
|
||||
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
|
||||
out: extracted-phrases
|
||||
@ -601,7 +608,7 @@ build-sparse
|
||||
default-name: model/sparse-features
|
||||
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
|
||||
create-config
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-model generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
|
||||
out: config
|
||||
ignore-if: use-hiero
|
||||
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
|
||||
@ -863,7 +870,7 @@ split-reference-devtest
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-splitter -model IN1.$output-extension < IN > OUT
|
||||
filter
|
||||
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains
|
||||
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
|
||||
out: filtered-dir
|
||||
default-name: tuning/filtered
|
||||
rerun-on-change: filter-settings ttable-binarizer
|
||||
@ -989,8 +996,8 @@ split-input
|
||||
pass-unless: input-splitter
|
||||
template: $input-splitter -model IN1.$input-extension < IN > OUT
|
||||
filter
|
||||
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains
|
||||
out: filtered-dir
|
||||
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
|
||||
out: filtered-dir
|
||||
default-name: evaluation/filtered
|
||||
rerun-on-change: filter-settings report-precision-by-coverage ttable-binarizer
|
||||
pass-if: TRAINING:binarize-all
|
||||
@ -1027,11 +1034,11 @@ remove-markup
|
||||
pass-unless: report-segmentation
|
||||
template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
|
||||
post-decoding-transliteration
|
||||
in: cleaned-output system-output TRAINING:transliteration-model LM:binlm
|
||||
in: cleaned-output system-output TRAINING:transliteration-model
|
||||
out: transliterated-output
|
||||
default-name: evaluation/transliterated
|
||||
pass-unless: TRAINING:post-decoding-transliteration
|
||||
template: $moses-script-dir/Transliteration/post-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN2 --input-extension $input-extension --output-extension $output-extension --language-model IN3 --output-file IN0 --oov-file IN1.oov
|
||||
template: $moses-script-dir/Transliteration/post-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN2 --input-extension $input-extension --output-extension $output-extension --language-model $TRAINING:language-model-file --output-file IN0 --oov-file IN1.oov
|
||||
recase-output
|
||||
in: transliterated-output RECASING:recase-config
|
||||
out: recased-output
|
||||
|
@ -2233,11 +2233,15 @@ sub get_config_tables {
|
||||
sub define_training_create_config {
|
||||
my ($step_id) = @_;
|
||||
|
||||
my ($config,$reordering_table,$phrase_translation_table,$translit_model,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
|
||||
my ($config,$reordering_table,$phrase_translation_table,$transliteration_pt,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
|
||||
= &get_output_and_input($step_id);
|
||||
|
||||
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
|
||||
|
||||
if($transliteration_pt){
|
||||
$cmd .= "-transliteration-phrase-table $transliteration_pt ";
|
||||
}
|
||||
|
||||
if($osm){
|
||||
|
||||
my $osm_settings = &get("TRAINING:operation-sequence-model-settings");
|
||||
@ -2623,7 +2627,7 @@ sub define_tuningevaluation_filter {
|
||||
my $tuning_flag = !defined($set);
|
||||
my $hierarchical = &get("TRAINING:hierarchical-rule-set");
|
||||
|
||||
my ($filter_dir,$input,$phrase_translation_table,$reordering_table,$domains) = &get_output_and_input($step_id);
|
||||
my ($filter_dir,$input,$phrase_translation_table,$reordering_table,$domains,$transliteration_table) = &get_output_and_input($step_id);
|
||||
|
||||
my $binarizer;
|
||||
$binarizer = &backoff_and_get("EVALUATION:$set:ttable-binarizer") unless $tuning_flag;
|
||||
@ -2683,7 +2687,14 @@ sub define_tuningevaluation_filter {
|
||||
|
||||
$cmd .= &get_config_tables($config,$reordering_table,$phrase_translation_table,undef,$domains);
|
||||
|
||||
if (&get("TRAINING:in-decoding-transliteration")) {
|
||||
|
||||
$cmd .= "-transliteration-phrase-table $dir/model/transliteration-phrase-table.$VERSION ";
|
||||
}
|
||||
|
||||
|
||||
$cmd .= "-lm 0:3:$config:8\n"; # dummy kenlm 3-gram model on factor 0
|
||||
|
||||
}
|
||||
|
||||
# filter command
|
||||
|
@ -436,7 +436,8 @@ function ngram_summary() {
|
||||
$score_line = "";
|
||||
for($i=0;$i<count($each_score);$i++) {
|
||||
if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
|
||||
$header .= "<td>$match[2]</td>";
|
||||
$score_line .= "<td>$match[1]</td>";
|
||||
}
|
||||
|
@ -683,7 +683,8 @@ function ngram_summary_diff() {
|
||||
$each_score = explode(" ; ",$experiment[$idx?$id2:$id]->result[$set]);
|
||||
for($i=0;$i<count($each_score);$i++) {
|
||||
if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
|
||||
$score[$match[2]][$idx] = $match[1];
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,8 @@ function head($title) {
|
||||
<body><h2>'.$title."</h2>\n";
|
||||
}
|
||||
|
||||
if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
|
||||
if (array_key_exists("setStepStatus",$_GET)) { set_step_status($_GET["setStepStatus"]); }
|
||||
else if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
|
||||
load_experiment_info();
|
||||
load_comment();
|
||||
|
||||
|
@ -295,7 +295,8 @@ function output_score($id,$info) {
|
||||
$each_score = explode(" ; ",$score);
|
||||
for($i=0;$i<count($each_score);$i++) {
|
||||
if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
|
||||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
|
||||
preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
|
||||
if ($i>0) { print "<BR>"; }
|
||||
$opened_a_tag = 0;
|
||||
if ($set != "avg") {
|
||||
|
61
scripts/other/delete-scores.perl
Executable file
61
scripts/other/delete-scores.perl
Executable file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
sub trim($);
|
||||
sub DeleteScore;
|
||||
|
||||
my $keepScoresStr;
|
||||
GetOptions(
|
||||
"keep-scores=s" => \$keepScoresStr
|
||||
) or exit(1);
|
||||
|
||||
my @keepScores = split(/,/, $keepScoresStr);
|
||||
|
||||
#MAIN LOOP
|
||||
while (my $line = <STDIN>) {
|
||||
chomp($line);
|
||||
#print STDERR "line=$line\n";
|
||||
|
||||
my @toks = split(/\|/, $line);
|
||||
my @scores = split(/ /, $toks[6]);
|
||||
|
||||
$toks[6] = DeleteScore($toks[6], \@keepScores);
|
||||
|
||||
# output
|
||||
print $toks[0];
|
||||
for (my $i = 1; $i < scalar(@toks); ++$i) {
|
||||
print "|" .$toks[$i];
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
######################
|
||||
# Perl trim function to remove whitespace from the start and end of the string
|
||||
sub trim($) {
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
|
||||
sub DeleteScore
|
||||
{
|
||||
my $string = $_[0];
|
||||
my @keepScores = @{$_[1]};
|
||||
|
||||
$string = trim($string);
|
||||
my @toks = split(/ /, $string);
|
||||
|
||||
$string = "";
|
||||
for (my $i = 0; $i < scalar(@keepScores); ++$i) {
|
||||
$string .= $toks[ $keepScores[$i] ] ." ";
|
||||
}
|
||||
$string = " " .$string;
|
||||
|
||||
return $string;
|
||||
}
|
@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
|
||||
$_DECODING_GRAPH_BACKOFF,
|
||||
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
|
||||
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
|
||||
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT,
|
||||
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
|
||||
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
|
||||
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
|
||||
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
|
||||
@ -122,7 +122,8 @@ $_HELP = 1
|
||||
'config=s' => \$_CONFIG,
|
||||
'osm-model=s' => \$_OSM,
|
||||
'osm-setting=s' => \$_OSM_FACTORS,
|
||||
'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
|
||||
'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
|
||||
'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
|
||||
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
|
||||
'do-steps=s' => \$_DO_STEPS,
|
||||
'memscore:s' => \$_MEMSCORE,
|
||||
@ -1879,6 +1880,8 @@ sub create_ini {
|
||||
$path++;
|
||||
}
|
||||
print INI "1 T 1\n" if $_GLUE_GRAMMAR;
|
||||
|
||||
print INI "1 T 1\n" if $_TRANSLITERATION_PHRASE_TABLE;
|
||||
|
||||
if (defined($_DECODING_GRAPH_BACKOFF)) {
|
||||
$_DECODING_GRAPH_BACKOFF =~ s/\s+/ /g;
|
||||
@ -1962,6 +1965,13 @@ sub create_ini {
|
||||
exit 1 if $i < $stepsused{"T"}; # fatal to define less
|
||||
}
|
||||
|
||||
if ($_TRANSLITERATION_PHRASE_TABLE){
|
||||
|
||||
$feature_spec .= "PhraseDictionaryMemory name=TranslationModel$i table-limit=100 num-features=4 path=$_TRANSLITERATION_PHRASE_TABLE input-factor=0 output-factor=0\n";
|
||||
$weight_spec .= "TranslationModel$i= 0.2 0.2 0.2 0.2\n";
|
||||
$i++;
|
||||
}
|
||||
|
||||
# glue grammar
|
||||
if ($_GLUE_GRAMMAR) {
|
||||
&full_path(\$___GLUE_GRAMMAR_FILE);
|
||||
@ -2069,8 +2079,9 @@ sub create_ini {
|
||||
|
||||
my $lm_oov_prob = 0.1;
|
||||
|
||||
if ($_POST_DECODING_TRANSLIT){
|
||||
if ($_POST_DECODING_TRANSLIT || $_TRANSLITERATION_PHRASE_TABLE){
|
||||
$lm_oov_prob = -100.0;
|
||||
$_LMODEL_OOV_FEATURE = "yes";
|
||||
}
|
||||
|
||||
$feature_spec .= "$type_name name=LM$i factor=$f path=$fn order=$o\n";
|
||||
|
Loading…
Reference in New Issue
Block a user