alignment info in pt

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3358 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2010-07-17 22:29:06 +00:00
parent fab2e96d2f
commit 31930eb6fc
22 changed files with 200 additions and 186 deletions

View File

@ -265,7 +265,7 @@ class TranslationTask : public Task {
//detailed translation reporting
if (m_detailedTranslationCollector) {
ostringstream out;
ostringstream out;
fix(out);
TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
m_detailedTranslationCollector->Write(m_lineNumber,out.str());

View File

@ -39,6 +39,8 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
// language model backoff stats,
if (doLMStats) {
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
@ -60,8 +62,9 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
epsilon = true;
droppedWords.push_back(source);
}
os << " SOURCE: " << swr << " " << source << std::endl
<< " TRANSLATED AS: " << target << std::endl;
os << " SOURCE: " << swr << " " << source << std::endl
<< " TRANSLATED AS: " << target << std::endl
<< " WORD ALIGNED: " << alignmentInfo << std::endl;
size_t twr_i = twr.GetStartPos();
size_t swr_i = swr.GetStartPos();
if (!epsilon) { sms << twr_i; }

View File

@ -7,6 +7,10 @@
objects = {
/* Begin PBXBuildFile section */
1E5D8E0411F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */; };
1E5D8E0511F25F03000F027F /* PhraseDictionaryNodeSCFG.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */; };
1E5D8E0811F25F2F000F027F /* PhraseDictionarySCFG.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */; };
1E5D8E0911F25F2F000F027F /* PhraseDictionarySCFGChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */; };
1ED4FD3711BDC0D2004E826A /* AlignmentInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FC5F11BDC0D2004E826A /* AlignmentInfo.cpp */; };
1ED4FD3811BDC0D2004E826A /* AlignmentInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FC6011BDC0D2004E826A /* AlignmentInfo.h */; };
1ED4FD3911BDC0D2004E826A /* BilingualDynSuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FC6111BDC0D2004E826A /* BilingualDynSuffixArray.cpp */; };
@ -133,13 +137,8 @@
1ED4FDB511BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */; };
1ED4FDB611BDC0D2004E826A /* PhraseDictionaryMemory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */; };
1ED4FDB711BDC0D2004E826A /* PhraseDictionaryMemory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */; };
1ED4FDB811BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */; };
1ED4FDB911BDC0D2004E826A /* PhraseDictionaryNewFormat.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */; };
1ED4FDBA11BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */; };
1ED4FDBB11BDC0D2004E826A /* PhraseDictionaryNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */; };
1ED4FDBC11BDC0D2004E826A /* PhraseDictionaryNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */; };
1ED4FDBD11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */; };
1ED4FDBE11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */; };
1ED4FDBF11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */; };
1ED4FDC011BDC0D2004E826A /* PhraseDictionaryOnDisk.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */; };
1ED4FDC111BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */; };
@ -218,6 +217,10 @@
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNodeSCFG.cpp; path = src/PhraseDictionaryNodeSCFG.cpp; sourceTree = "<group>"; };
1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNodeSCFG.h; path = src/PhraseDictionaryNodeSCFG.h; sourceTree = "<group>"; };
1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionarySCFG.cpp; path = src/PhraseDictionarySCFG.cpp; sourceTree = "<group>"; };
1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionarySCFGChart.cpp; path = src/PhraseDictionarySCFGChart.cpp; sourceTree = "<group>"; };
1ED4FC5F11BDC0D2004E826A /* AlignmentInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfo.cpp; path = src/AlignmentInfo.cpp; sourceTree = "<group>"; };
1ED4FC6011BDC0D2004E826A /* AlignmentInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AlignmentInfo.h; path = src/AlignmentInfo.h; sourceTree = "<group>"; };
1ED4FC6111BDC0D2004E826A /* BilingualDynSuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BilingualDynSuffixArray.cpp; path = src/BilingualDynSuffixArray.cpp; sourceTree = "<group>"; };
@ -345,13 +348,8 @@
1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryDynSuffixArray.h; path = src/PhraseDictionaryDynSuffixArray.h; sourceTree = "<group>"; };
1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryMemory.cpp; path = src/PhraseDictionaryMemory.cpp; sourceTree = "<group>"; };
1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryMemory.h; path = src/PhraseDictionaryMemory.h; sourceTree = "<group>"; };
1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNewFormat.cpp; path = src/PhraseDictionaryNewFormat.cpp; sourceTree = "<group>"; };
1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNewFormat.h; path = src/PhraseDictionaryNewFormat.h; sourceTree = "<group>"; };
1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNewFormatChart.cpp; path = src/PhraseDictionaryNewFormatChart.cpp; sourceTree = "<group>"; };
1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNode.cpp; path = src/PhraseDictionaryNode.cpp; sourceTree = "<group>"; };
1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNode.h; path = src/PhraseDictionaryNode.h; sourceTree = "<group>"; };
1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNodeNewFormat.cpp; path = src/PhraseDictionaryNodeNewFormat.cpp; sourceTree = "<group>"; };
1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNodeNewFormat.h; path = src/PhraseDictionaryNodeNewFormat.h; sourceTree = "<group>"; };
1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryOnDisk.cpp; path = src/PhraseDictionaryOnDisk.cpp; sourceTree = "<group>"; };
1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryOnDisk.h; path = src/PhraseDictionaryOnDisk.h; sourceTree = "<group>"; };
1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryOnDiskChart.cpp; path = src/PhraseDictionaryOnDiskChart.cpp; sourceTree = "<group>"; };
@ -576,16 +574,15 @@
1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */,
1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */,
1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */,
1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */,
1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */,
1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */,
1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */,
1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */,
1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */,
1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */,
1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */,
1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */,
1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */,
1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */,
1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */,
1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */,
1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */,
1ED4FCEE11BDC0D2004E826A /* PhraseDictionaryTree.cpp */,
1ED4FCEF11BDC0D2004E826A /* PhraseDictionaryTree.h */,
1ED4FCF011BDC0D2004E826A /* PhraseDictionaryTreeAdaptor.cpp */,
@ -767,9 +764,7 @@
1ED4FDB311BDC0D2004E826A /* PhraseDictionary.h in Headers */,
1ED4FDB511BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h in Headers */,
1ED4FDB711BDC0D2004E826A /* PhraseDictionaryMemory.h in Headers */,
1ED4FDB911BDC0D2004E826A /* PhraseDictionaryNewFormat.h in Headers */,
1ED4FDBC11BDC0D2004E826A /* PhraseDictionaryNode.h in Headers */,
1ED4FDBE11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h in Headers */,
1ED4FDC011BDC0D2004E826A /* PhraseDictionaryOnDisk.h in Headers */,
1ED4FDC311BDC0D2004E826A /* PhraseDictionaryTree.h in Headers */,
1ED4FDC511BDC0D2004E826A /* PhraseDictionaryTreeAdaptor.h in Headers */,
@ -809,6 +804,7 @@
1ED4FE0511BDC0D2004E826A /* WordsBitmap.h in Headers */,
1ED4FE0711BDC0D2004E826A /* WordsRange.h in Headers */,
1ED4FE0911BDC0D2004E826A /* XmlOption.h in Headers */,
1E5D8E0511F25F03000F027F /* PhraseDictionaryNodeSCFG.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -913,10 +909,7 @@
1ED4FDB211BDC0D2004E826A /* PhraseDictionary.cpp in Sources */,
1ED4FDB411BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.cpp in Sources */,
1ED4FDB611BDC0D2004E826A /* PhraseDictionaryMemory.cpp in Sources */,
1ED4FDB811BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp in Sources */,
1ED4FDBA11BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp in Sources */,
1ED4FDBB11BDC0D2004E826A /* PhraseDictionaryNode.cpp in Sources */,
1ED4FDBD11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp in Sources */,
1ED4FDBF11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp in Sources */,
1ED4FDC111BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp in Sources */,
1ED4FDC211BDC0D2004E826A /* PhraseDictionaryTree.cpp in Sources */,
@ -953,6 +946,9 @@
1ED4FE0411BDC0D2004E826A /* WordsBitmap.cpp in Sources */,
1ED4FE0611BDC0D2004E826A /* WordsRange.cpp in Sources */,
1ED4FE0811BDC0D2004E826A /* XmlOption.cpp in Sources */,
1E5D8E0411F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp in Sources */,
1E5D8E0811F25F2F000F027F /* PhraseDictionarySCFG.cpp in Sources */,
1E5D8E0911F25F2F000F027F /* PhraseDictionarySCFGChart.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -27,7 +27,7 @@ std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
AlignmentInfo::const_iterator iter;
for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter)
{
out << "(" << iter->first << "," << iter->second << ") ";
out << iter->first << "-" << iter->second << " ";
}
return out;
}

View File

@ -19,7 +19,6 @@
***********************************************************************/
#include "DotChart.h"
#include "Util.h"
#include "PhraseDictionaryNodeNewFormat.h"
using namespace std;
@ -42,7 +41,7 @@ ProcessedRuleStack::~ProcessedRuleStack()
std::ostream& operator<<(std::ostream &out, const ProcessedRule &rule)
{
const PhraseDictionaryNode &node = rule.GetLastNode();
//const PhraseDictionaryNode &node = rule.GetLastNode();
//out << node;
return out;

View File

@ -20,7 +20,6 @@
#include <algorithm>
#include "DotChartOnDisk.h"
#include "Util.h"
#include "PhraseDictionaryNodeNewFormat.h"
#include "../../OnDiskPt/src/PhraseNode.h"
using namespace std;

View File

@ -440,17 +440,24 @@ void Hypothesis::CleanupArcList()
TO_STRING_BODY(Hypothesis)
// friend
ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
ostream& operator<<(ostream& out, const Hypothesis& hypo)
{
hypothesis.ToStream(out);
hypo.ToStream(out);
// words bitmap
out << "[" << hypothesis.m_sourceCompleted << "] ";
out << "[" << hypo.m_sourceCompleted << "] ";
// scores
out << " [total=" << hypothesis.GetTotalScore() << "]";
out << " " << hypothesis.GetScoreBreakdown();
out << " [total=" << hypo.GetTotalScore() << "]";
out << " " << hypo.GetScoreBreakdown();
// alignment
out << " " << hypo.GetCurrTargetPhrase().GetAlignmentInfo();
/*
const Hypothesis *prevHypo = hypo.GetPrevHypo();
if (prevHypo)
out << endl << *prevHypo;
*/
return out;
}

View File

@ -63,9 +63,9 @@ libmoses_la_HEADERS = \
PhraseDictionary.h \
PhraseDictionaryDynSuffixArray.h \
PhraseDictionaryMemory.h \
PhraseDictionaryNewFormat.h \
PhraseDictionarySCFG.h \
PhraseDictionaryNode.h \
PhraseDictionaryNodeNewFormat.h \
PhraseDictionaryNodeSCFG.h \
PhraseDictionaryOnDisk.h \
PhraseDictionaryTree.h \
PhraseDictionaryTreeAdaptor.h \
@ -185,10 +185,10 @@ libmoses_la_SOURCES = \
PhraseDictionary.cpp \
PhraseDictionaryDynSuffixArray.cpp \
PhraseDictionaryMemory.cpp \
PhraseDictionaryNewFormat.cpp \
PhraseDictionaryNewFormatChart.cpp \
PhraseDictionarySCFG.cpp \
PhraseDictionarySCFGChart.cpp \
PhraseDictionaryNode.cpp \
PhraseDictionaryNodeNewFormat.cpp \
PhraseDictionaryNodeSCFG.cpp \
PhraseDictionaryOnDisk.cpp \
PhraseDictionaryOnDiskChart.cpp \
PhraseDictionaryTree.cpp \

View File

@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "PhraseDictionary.h"
#include "PhraseDictionaryTreeAdaptor.h"
#include "PhraseDictionaryNewFormat.h"
#include "PhraseDictionarySCFG.h"
#include "PhraseDictionaryOnDisk.h"
#ifndef WIN32
#include "PhraseDictionaryDynSuffixArray.h"
@ -65,7 +65,7 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
const StaticData& staticData = StaticData::Instance();
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
//Thread-safe phrase dictionaries get loaded now
if (implementation == Memory || implementation == NewFormat || implementation == OnDisk || implementation == SuffixArray) {
if (implementation == Memory || implementation == SCFG || implementation == OnDisk || implementation == SuffixArray) {
m_threadSafePhraseDictionary.reset(LoadPhraseTable());
m_useThreadSafePhraseDictionary = true;
} else {
@ -113,7 +113,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
, staticData.GetWeightWordPenalty()));
return pdta;
}
else if (m_implementation == NewFormat)
else if (m_implementation == SCFG)
{ // memory phrase table
VERBOSE(2,"using New Format phrase tables" << std::endl);
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
@ -121,7 +121,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
VERBOSE(2,"Using gzipped file" << std::endl);
}
PhraseDictionaryNewFormat* pdm = new PhraseDictionaryNewFormat(m_numScoreComponent,this);
PhraseDictionarySCFG* pdm = new PhraseDictionarySCFG(m_numScoreComponent,this);
assert(pdm->Load(m_input
, m_output
, m_filePath

View File

@ -77,6 +77,8 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
{ // init numElement
numElement = tokens.size();
assert(numElement == 3 || numElement == 5);
// Pharoah style: source ||| target ||| scores
// New moses style: source ||| target ||| alignment ||| scores ||| count
}
if (tokens.size() != numElement)
@ -87,19 +89,16 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
abort();
}
string sourcePhraseString, targetPhraseString;
string scoreString;
string sourceAlignString, targetAlignString;
const string *scoreString;
const string &sourcePhraseString=tokens[0]
,&targetPhraseString=tokens[1];
sourcePhraseString=tokens[0];
targetPhraseString=tokens[1];
if (numElement==3){
scoreString=tokens[2];
scoreString = &tokens[2];
}
else{
sourceAlignString=tokens[2];
targetAlignString=tokens[3];
scoreString=tokens[4];
scoreString = &tokens[3];
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
@ -112,7 +111,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
if (sourcePhraseString != prevSourcePhrase)
phraseVector = Phrase::Parse(sourcePhraseString, input, factorDelimiter);
vector<float> scoreVector = Tokenize<float>(scoreString);
vector<float> scoreVector = Tokenize<float>(*scoreString);
if (scoreVector.size() != m_numScoreComponent)
{
stringstream strme;
@ -120,7 +119,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
UserMessage::Add(strme.str());
abort();
}
// assert(scoreVector.size() == m_numScoreComponent);
// source
Phrase sourcePhrase(Input);
@ -129,8 +127,8 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
TargetPhrase targetPhrase(Output);
targetPhrase.SetSourcePhrase(&sourcePhrase);
targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
if (numElement == 5)
targetPhrase.SetAlignmentInfo(tokens[2]);
// component score, for n-best output
std::vector<float> scv(scoreVector.size());

View File

@ -1,4 +1,4 @@
// $Id: PhraseDictionaryNodeNewFormat.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
// $Id: PhraseDictionaryNodeSCFG.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
/***********************************************************************
Moses - factored phrase-based language decoder
@ -19,27 +19,27 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseDictionaryNodeNewFormat.h"
#include "PhraseDictionaryNodeSCFG.h"
#include "TargetPhrase.h"
#include "PhraseDictionaryMemory.h"
namespace Moses
{
size_t PhraseDictionaryNodeNewFormat::s_id = 0;
size_t PhraseDictionaryNodeSCFG::s_id = 0;
PhraseDictionaryNodeNewFormat::~PhraseDictionaryNodeNewFormat()
PhraseDictionaryNodeSCFG::~PhraseDictionaryNodeSCFG()
{
delete m_targetPhraseCollection;
}
void PhraseDictionaryNodeNewFormat::CleanUp()
void PhraseDictionaryNodeSCFG::CleanUp()
{
delete m_targetPhraseCollection;
m_targetPhraseCollection = NULL;
m_map.clear();
}
void PhraseDictionaryNodeNewFormat::Sort(size_t tableLimit)
void PhraseDictionaryNodeSCFG::Sort(size_t tableLimit)
{
// recusively sort
NodeMap::iterator iter;
@ -58,7 +58,7 @@ void PhraseDictionaryNodeNewFormat::Sort(size_t tableLimit)
m_targetPhraseCollection->NthElement(tableLimit);
}
PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetOrCreateChild(const Word &word, const Word &sourcelabel)
PhraseDictionaryNodeSCFG *PhraseDictionaryNodeSCFG::GetOrCreateChild(const Word &word, const Word &sourcelabel)
{
InnerNodeMap *innerNodeMap;
innerNodeMap = &m_map[sourcelabel];
@ -69,17 +69,17 @@ PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetOrCreateChild(c
// can't find node. create a new 1
std::pair <InnerNodeMap::iterator,bool> insResult;
insResult = innerNodeMap->insert( std::make_pair(word, PhraseDictionaryNodeNewFormat()) );
insResult = innerNodeMap->insert( std::make_pair(word, PhraseDictionaryNodeSCFG()) );
assert(insResult.second);
iter = insResult.first;
PhraseDictionaryNodeNewFormat &ret = iter->second;
PhraseDictionaryNodeSCFG &ret = iter->second;
ret.SetSourceWord(iter->first);
//ret.SetSourceWord(word);
return &ret;
}
const PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetChild(const Word &word, const Word &sourcelabel) const
const PhraseDictionaryNodeSCFG *PhraseDictionaryNodeSCFG::GetChild(const Word &word, const Word &sourcelabel) const
{
NodeMap::const_iterator iterOuter = m_map.find(sourcelabel);
if (iterOuter == m_map.end())
@ -95,7 +95,7 @@ const PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetChild(con
}
void PhraseDictionaryNodeNewFormat::SetWeightTransModel(const PhraseDictionary *phraseDictionary
void PhraseDictionaryNodeSCFG::SetWeightTransModel(const PhraseDictionary *phraseDictionary
, const std::vector<float> &weightT)
{
// recursively set weights
@ -125,13 +125,13 @@ void PhraseDictionaryNodeNewFormat::SetWeightTransModel(const PhraseDictionary *
}
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeNewFormat &node)
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeSCFG &node)
{
out << node.GetTargetPhraseCollection();
return out;
}
TO_STRING_BODY(PhraseDictionaryNodeNewFormat)
TO_STRING_BODY(PhraseDictionaryNodeSCFG)
}

View File

@ -1,4 +1,4 @@
// $Id: PhraseDictionaryNodeNewFormat.h 3049 2010-04-05 18:34:09Z hieuhoang1972 $
// $Id: PhraseDictionaryNodeSCFG.h 3049 2010-04-05 18:34:09Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
@ -41,18 +41,18 @@ class InputType;
/** One node of the PhraseDictionaryMemory structure
*/
class PhraseDictionaryNodeNewFormat : public PhraseDictionaryNode
class PhraseDictionaryNodeSCFG : public PhraseDictionaryNode
{
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNodeNewFormat&);
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNodeSCFG&);
typedef std::map<Word, PhraseDictionaryNodeNewFormat> InnerNodeMap;
typedef std::map<Word, PhraseDictionaryNodeSCFG> InnerNodeMap;
typedef std::map<Word, InnerNodeMap> NodeMap;
// 1st word = source side non-term, or the word if term
// 2nd word = target side non term, or the word if term
// only these classes are allowed to instantiate this class
friend class PhraseDictionaryNewFormat;
friend class std::map<Word, PhraseDictionaryNodeNewFormat>;
friend class PhraseDictionarySCFG;
friend class std::map<Word, PhraseDictionaryNodeSCFG>;
protected:
static size_t s_id;
@ -62,18 +62,18 @@ protected:
const Word *m_sourceWord;
float m_entropy;
PhraseDictionaryNodeNewFormat()
PhraseDictionaryNodeSCFG()
:m_id(s_id++)
,m_targetPhraseCollection(NULL)
,m_sourceWord(NULL)
{}
public:
virtual ~PhraseDictionaryNodeNewFormat();
virtual ~PhraseDictionaryNodeSCFG();
void CleanUp();
void Sort(size_t tableLimit);
PhraseDictionaryNodeNewFormat *GetOrCreateChild(const Word &word, const Word &sourcelabel);
const PhraseDictionaryNodeNewFormat *GetChild(const Word &word, const Word &sourcelabel) const;
PhraseDictionaryNodeSCFG *GetOrCreateChild(const Word &word, const Word &sourcelabel);
const PhraseDictionaryNodeSCFG *GetChild(const Word &word, const Word &sourcelabel) const;
const TargetPhraseCollection *GetTargetPhraseCollection() const
{ return m_targetPhraseCollection; }

View File

@ -1,4 +1,4 @@
// $Id: PhraseDictionaryNewFormat.cpp 3056 2010-04-06 13:40:24Z hieuhoang1972 $
// $Id: PhraseDictionarySCFG.cpp 3056 2010-04-06 13:40:24Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
@ -25,7 +25,7 @@
#include <iterator>
#include <algorithm>
#include <sys/stat.h>
#include "PhraseDictionaryNewFormat.h"
#include "PhraseDictionarySCFG.h"
#include "FactorCollection.h"
#include "Word.h"
#include "Util.h"
@ -55,25 +55,8 @@ inline void TransformString(vector< vector<string>* > &phraseVector)
}
}
}
void CreateAlignmentInfo(list<pair<size_t,size_t> > &alignmentInfo, const string &alignString)
{
vector<string> alignVec = Tokenize(alignString);
vector<string>::const_iterator iter;
for (iter = alignVec.begin(); iter != alignVec.end(); ++iter)
{
const string &align1 = *iter;
vector<size_t> alignPos = Tokenize<size_t>(align1, "-");
assert(alignPos.size() == 2);
size_t &sourcePos = alignPos[0]
,&targetPos = alignPos[1];
alignmentInfo.push_back(pair<size_t,size_t>(sourcePos, targetPos));
}
}
void PhraseDictionaryNewFormat::CreateSourceLabels(vector<Word> &sourceLabels
void PhraseDictionarySCFG::CreateSourceLabels(vector<Word> &sourceLabels
, const vector<string> &sourceLabelsStr) const
{
FactorCollection &factorCollection = FactorCollection::Instance();
@ -90,7 +73,7 @@ void PhraseDictionaryNewFormat::CreateSourceLabels(vector<Word> &sourceLabels
}
}
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const string &filePath
, const vector<float> &weight
@ -112,7 +95,7 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
return ret;
}
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::vector<float> &weight
@ -125,7 +108,7 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
VERBOSE(2,"PhraseDictionaryNewFormat: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
VERBOSE(2,"PhraseDictionarySCFG: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
string line;
size_t count = 0;
@ -179,12 +162,8 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
// alignment
list<pair<size_t,size_t> > alignmentInfo;
CreateAlignmentInfo(alignmentInfo, alignString);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignmentInfo);
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
@ -212,20 +191,20 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
return true;
}
TargetPhraseCollection &PhraseDictionaryNewFormat::GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target)
TargetPhraseCollection &PhraseDictionarySCFG::GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target)
{
PhraseDictionaryNodeNewFormat &currNode = GetOrCreateNode(source, target);
PhraseDictionaryNodeSCFG &currNode = GetOrCreateNode(source, target);
return currNode.GetOrCreateTargetPhraseCollection();
}
PhraseDictionaryNodeNewFormat &PhraseDictionaryNewFormat::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
{
const size_t size = source.GetSize();
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
PhraseDictionaryNodeNewFormat *currNode = &m_collection;
PhraseDictionaryNodeSCFG *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
const Word& word = source.GetWord(pos);
@ -253,25 +232,25 @@ PhraseDictionaryNodeNewFormat &PhraseDictionaryNewFormat::GetOrCreateNode(const
return *currNode;
}
void PhraseDictionaryNewFormat::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
void PhraseDictionarySCFG::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
{
assert(false); // TODO
}
void PhraseDictionaryNewFormat::AddEquivPhrase(TargetPhraseCollection &targetPhraseColl, TargetPhrase *targetPhrase)
void PhraseDictionarySCFG::AddEquivPhrase(TargetPhraseCollection &targetPhraseColl, TargetPhrase *targetPhrase)
{
targetPhraseColl.Add(targetPhrase);
}
const TargetPhraseCollection *PhraseDictionaryNewFormat::GetTargetPhraseCollection(const Phrase &source) const
const TargetPhraseCollection *PhraseDictionarySCFG::GetTargetPhraseCollection(const Phrase &source) const
{ // exactly like CreateTargetPhraseCollection, but don't create
assert(false);
return NULL;
/*
const size_t size = source.GetSize();
const PhraseDictionaryNodeNewFormat *currNode = &m_collection;
const PhraseDictionaryNodeSCFG *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
const Word& word = source.GetWord(pos);
@ -284,7 +263,7 @@ const TargetPhraseCollection *PhraseDictionaryNewFormat::GetTargetPhraseCollecti
*/
}
void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input)
void PhraseDictionarySCFG::InitializeForInput(const InputType& input)
{
assert(m_runningNodesVec.size() == 0);
size_t sourceSize = input.GetSize();
@ -301,29 +280,29 @@ void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input)
}
}
PhraseDictionaryNewFormat::~PhraseDictionaryNewFormat()
PhraseDictionarySCFG::~PhraseDictionarySCFG()
{
CleanUp();
}
void PhraseDictionaryNewFormat::SetWeightTransModel(const vector<float> &weightT)
void PhraseDictionarySCFG::SetWeightTransModel(const vector<float> &weightT)
{
PhraseDictionaryNodeNewFormat::iterator iterDict;
PhraseDictionaryNodeSCFG::iterator iterDict;
for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict)
{
PhraseDictionaryNodeNewFormat::InnerNodeMap &innerNode = iterDict->second;
PhraseDictionaryNodeNewFormat::InnerNodeMap::iterator iterInner;
PhraseDictionaryNodeSCFG::InnerNodeMap &innerNode = iterDict->second;
PhraseDictionaryNodeSCFG::InnerNodeMap::iterator iterInner;
for (iterInner = innerNode.begin() ; iterInner != innerNode.end() ; ++iterInner)
{
// recursively set weights in nodes
PhraseDictionaryNodeNewFormat &node = iterInner->second;
PhraseDictionaryNodeSCFG &node = iterInner->second;
node.SetWeightTransModel(this, weightT);
}
}
}
void PhraseDictionaryNewFormat::CleanUp()
void PhraseDictionarySCFG::CleanUp()
{
//RemoveAllInColl(m_chartTargetPhraseColl);
std::vector<ChartRuleCollection*>::iterator iter;
@ -337,13 +316,13 @@ void PhraseDictionaryNewFormat::CleanUp()
RemoveAllInColl(m_runningNodesVec);
}
TO_STRING_BODY(PhraseDictionaryNewFormat);
TO_STRING_BODY(PhraseDictionarySCFG);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryNewFormat& phraseDict)
ostream& operator<<(ostream& out, const PhraseDictionarySCFG& phraseDict)
{
const PhraseDictionaryNodeNewFormat &coll = phraseDict.m_collection;
PhraseDictionaryNodeNewFormat::const_iterator iter;
const PhraseDictionaryNodeSCFG &coll = phraseDict.m_collection;
PhraseDictionaryNodeSCFG::const_iterator iter;
for (iter = coll.begin() ; iter != coll.end() ; ++iter)
{
const Word &word = (*iter).first;

View File

@ -1,4 +1,4 @@
// $Id: PhraseDictionaryNewFormat.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
// $Id: PhraseDictionarySCFG.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
@ -23,7 +23,7 @@
#pragma once
#include "PhraseDictionary.h"
#include "PhraseDictionaryNodeNewFormat.h"
#include "PhraseDictionaryNodeSCFG.h"
#include "ChartRuleCollection.h"
#include "CellCollection.h"
@ -35,13 +35,13 @@ namespace Moses
/*** Implementation of a phrase table in a trie. Looking up a phrase of
* length n words requires n look-ups to find the TargetPhraseCollection.
*/
class PhraseDictionaryNewFormat : public PhraseDictionary
class PhraseDictionarySCFG : public PhraseDictionary
{
typedef PhraseDictionary MyBase;
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNewFormat&);
friend std::ostream& operator<<(std::ostream&, const PhraseDictionarySCFG&);
protected:
PhraseDictionaryNodeNewFormat m_collection;
PhraseDictionaryNodeSCFG m_collection;
mutable std::vector<ChartRuleCollection*> m_chartTargetPhraseColl;
mutable std::vector<ProcessedRuleStack*> m_runningNodesVec;
@ -51,7 +51,7 @@ namespace Moses
std::string m_filePath;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target);
PhraseDictionaryNodeNewFormat &GetOrCreateNode(const Phrase &source, const TargetPhrase &target);
PhraseDictionaryNodeSCFG &GetOrCreateNode(const Phrase &source, const TargetPhrase &target);
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
@ -68,13 +68,13 @@ namespace Moses
Word CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const;
public:
PhraseDictionaryNewFormat(size_t numScoreComponent, PhraseDictionaryFeature* feature)
PhraseDictionarySCFG(size_t numScoreComponent, PhraseDictionaryFeature* feature)
: MyBase(numScoreComponent, feature)
, m_prevSource(Input)
, m_prevPhraseColl(NULL)
{
}
virtual ~PhraseDictionaryNewFormat();
virtual ~PhraseDictionarySCFG();
std::string GetScoreProducerDescription() const
{ return "Hieu's Reordering Model"; }

View File

@ -1,4 +1,4 @@
// $Id: PhraseDictionaryNewFormat.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
// $Id: PhraseDictionarySCFG.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -19,7 +19,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseDictionaryNewFormat.h"
#include "PhraseDictionarySCFG.h"
#include "FactorCollection.h"
#include "InputType.h"
#include "ChartRuleCollection.h"
@ -31,7 +31,7 @@
using namespace std;
using namespace Moses;
Word PhraseDictionaryNewFormat::CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const
Word PhraseDictionarySCFG::CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const
{
string coveredWordsString = origSourceLabel.GetFactor(0)->GetString();
@ -51,7 +51,7 @@ Word PhraseDictionaryNewFormat::CreateCoveredWord(const Word &origSourceLabel, c
return ret;
}
const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
const ChartRuleCollection *PhraseDictionarySCFG::GetChartRuleCollection(
InputType const& src
,WordsRange const& range
,bool adhereTableLimit
@ -71,7 +71,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
{
const SavedNode &savedNode = *savedNodeColl[ind];
const ProcessedRule &prevProcessedRule = savedNode.GetProcessedRule();
const PhraseDictionaryNodeNewFormat &prevNode = static_cast<const PhraseDictionaryNodeNewFormat &>(prevProcessedRule.GetLastNode());
const PhraseDictionaryNodeSCFG &prevNode = static_cast<const PhraseDictionaryNodeSCFG &>(prevProcessedRule.GetLastNode());
const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed();
size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1;
@ -79,7 +79,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
if (startPos == absEndPos)
{
const Word &sourceWord = src.GetWord(absEndPos);
const PhraseDictionaryNodeNewFormat *node = prevNode.GetChild(sourceWord, sourceWord);
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord, sourceWord);
if (node != NULL)
{
const Word &sourceWord = node->GetSourceWord();
@ -123,7 +123,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
{
const Word &headWord = *iterHeadWords;
const PhraseDictionaryNodeNewFormat *node = prevNode.GetChild(headWord, sourceLabel);
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(headWord, sourceLabel);
if (node != NULL)
{
//const Word &sourceWord = node->GetSourceWord();
@ -147,7 +147,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
for (iterNode = nodes.begin(); iterNode != nodes.end(); ++iterNode)
{
const ProcessedRule &processedRule = **iterNode;
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat &>(processedRule.GetLastNode());
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG &>(processedRule.GetLastNode());
const WordConsumed *wordConsumed = processedRule.GetLastWordConsumed();
assert(wordConsumed);
@ -163,7 +163,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
return ret;
}
void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
void PhraseDictionarySCFG::DeleteDuplicates(ProcessedRuleColl &nodes) const
{
map<size_t, float> minEntropy;
map<size_t, float>::iterator iterEntropy;
@ -173,7 +173,7 @@ void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
for (iter = nodes.begin(); iter != nodes.end(); ++iter)
{
const ProcessedRule *processedRule = *iter;
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat&> (processedRule->GetLastNode());
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG&> (processedRule->GetLastNode());
size_t nodeId = node.GetId();
float entropy = node.GetEntropy();
@ -197,7 +197,7 @@ void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
while (ind < nodes.GetSize())
{
const ProcessedRule &processedRule = nodes.Get(ind);
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat&> (processedRule.GetLastNode());
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG&> (processedRule.GetLastNode());
size_t nodeId = node.GetId();
float entropy = node.GetEntropy();
float minEntropy1 = minEntropy[nodeId];

View File

@ -487,19 +487,18 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
abort();
}
std::string sourcePhraseString, targetPhraseString;
std::string scoreString;
std::string *scoreString;
std::string sourceAlignString, targetAlignString;
sourcePhraseString=tokens[0];
targetPhraseString=tokens[1];
const std::string &sourcePhraseString=tokens[0]
,&targetPhraseString=tokens[1];
if (numElement==3){
scoreString=tokens[2];
scoreString = &tokens[2];
}
else{
sourceAlignString=tokens[2];
targetAlignString=tokens[3];
scoreString=tokens[4];
scoreString = &tokens[3];
}
@ -540,13 +539,12 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
// while(is>>w && w!="|||") sc.push_back(atof(w.c_str()));
// Mauro: to handle 0 probs in phrase tables
std::vector<float> scoreVector = Tokenize<float>(scoreString);
std::vector<float> scoreVector = Tokenize<float>(*scoreString);
for (size_t i = 0 ; i < scoreVector.size() ; ++i)
{
float tmp = scoreVector[i];
sc.push_back(((tmp>0.0)?tmp:(float)1.0e-38));
}
if(f.empty())
{

View File

@ -276,6 +276,26 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
return clone;
}
void TargetPhrase::SetAlignmentInfo(const std::string &alignString)
{
list<pair<size_t,size_t> > alignmentInfo;
vector<string> alignVec = Tokenize(alignString);
vector<string>::const_iterator iter;
for (iter = alignVec.begin(); iter != alignVec.end(); ++iter)
{
const string &align1 = *iter;
vector<size_t> alignPos = Tokenize<size_t>(align1, "-");
assert(alignPos.size() == 2);
size_t &sourcePos = alignPos[0]
,&targetPos = alignPos[1];
alignmentInfo.push_back(pair<size_t,size_t>(sourcePos, targetPos));
}
SetAlignmentInfo(alignmentInfo);
}
void TargetPhrase::SetAlignmentInfo(const std::list<std::pair<size_t,size_t> > &alignmentInfo)
{
m_alignmentInfo.AddAlignment(alignmentInfo);

View File

@ -169,6 +169,7 @@ public:
const Word &GetTargetLHS() const
{ return m_lhsTarget; }
void SetAlignmentInfo(const std::string &alignString);
void SetAlignmentInfo(const std::list<std::pair<size_t,size_t> > &alignmentInfo);
AlignmentInfo &GetAlignmentInfo()

View File

@ -163,7 +163,7 @@ enum PhraseTableImplementation
//,GlueRule = 3
//,Joshua = 4
//,MemorySourceLabel = 5
,NewFormat = 6
,SCFG = 6
//,BerkeleyDb = 7
,SuffixArray = 8
};

View File

@ -112,6 +112,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// indirect: source target probabilities
// consistency checks
/*
size_t expectedSize = (hierarchicalFlag ? 5 : 4);
if (itemDirect.size() != expectedSize)
{
@ -126,7 +127,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
<< fileNameIndirect << ", line " << i << endl;
exit(1);
}
*/
if (itemDirect[0].compare( itemIndirect[0] ) != 0)
{
cerr << "ERROR: target phrase does not match in line " << i << ": '"
@ -145,13 +147,9 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " ||| ";
// output alignment and probabilities
if (hierarchicalFlag)
fileConsolidated << itemDirect[2] << " ||| " // alignment
fileConsolidated << itemDirect[2] << " ||| " // alignment
<< itemIndirect[2] // prob indirect
<< " " << itemDirect[3]; // prob direct
else
fileConsolidated << itemIndirect[2] // prob indirect
<< " " << itemDirect[2]; // prob direct
fileConsolidated << " " << (logProbFlag ? 1 : 2.718); // phrase count feature
// counts

View File

@ -67,22 +67,22 @@
1C05B9E91174CC24003585B2 /* statistics.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = statistics.cpp; sourceTree = "<group>"; };
1C05B9EA1174CC24003585B2 /* SyntaxTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SyntaxTree.cpp; sourceTree = "<group>"; };
1C05B9EB1174CC24003585B2 /* SyntaxTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SyntaxTree.h; sourceTree = "<group>"; };
1C05B9F71174CE51003585B2 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; };
1C05BA1F1174CEE8003585B2 /* XmlTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = XmlTree.cpp; sourceTree = "<group>"; };
1C05BA201174CEE8003585B2 /* XmlTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XmlTree.h; sourceTree = "<group>"; };
1C05BA2E1174CF6C003585B2 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; };
1C47578F102B78AD00AB74DB /* score */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = score; sourceTree = BUILT_PRODUCTS_DIR; };
1C475794102B78DD00AB74DB /* score.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = score.cpp; sourceTree = "<group>"; };
1C4757C4102B7EAA00AB74DB /* consolidate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = consolidate; sourceTree = BUILT_PRODUCTS_DIR; };
1C5C088A0FFE54F400B00995 /* extract */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extract; sourceTree = BUILT_PRODUCTS_DIR; };
1C6A83031111F5A300059E7F /* AlignmentPhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = AlignmentPhrase.cpp; sourceTree = "<group>"; };
1C6A83041111F5A300059E7F /* AlignmentPhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AlignmentPhrase.h; sourceTree = "<group>"; };
1CE8CE2C0FC6EA0200924FEA /* extract.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = extract.cpp; sourceTree = "<group>"; };
1CE8CE4B0FC6EAA200924FEA /* tables-core.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "tables-core.cpp"; sourceTree = "<group>"; };
1CE8CE4C0FC6EAA200924FEA /* tables-core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "tables-core.h"; sourceTree = "<group>"; };
1CF9F71A108C9FE700EABCE5 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; };
1CFE962311762A20006FF13B /* consolidate-direct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "consolidate-direct.cpp"; sourceTree = "<group>"; };
1CFE962411762A20006FF13B /* consolidate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consolidate.cpp; sourceTree = "<group>"; };
1E7C2CF611F1146200213451 /* extract */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extract; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2CF811F1146200213451 /* score */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = score; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2CFA11F1146300213451 /* consolidate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = consolidate; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2CFC11F1146300213451 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2CFE11F1146300213451 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2D0011F1146300213451 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -176,12 +176,12 @@
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
1C5C088A0FFE54F400B00995 /* extract */,
1C47578F102B78AD00AB74DB /* score */,
1C4757C4102B7EAA00AB74DB /* consolidate */,
1CF9F71A108C9FE700EABCE5 /* consolidate-direct */,
1C05B9F71174CE51003585B2 /* extract-rules */,
1C05BA2E1174CF6C003585B2 /* statistics */,
1E7C2CF611F1146200213451 /* extract */,
1E7C2CF811F1146200213451 /* score */,
1E7C2CFA11F1146300213451 /* consolidate */,
1E7C2CFC11F1146300213451 /* consolidate-direct */,
1E7C2CFE11F1146300213451 /* extract-rules */,
1E7C2D0011F1146300213451 /* statistics */,
);
name = Products;
sourceTree = "<group>";
@ -209,7 +209,7 @@
);
name = "extract-rules";
productName = "extract-rules";
productReference = 1C05B9F71174CE51003585B2 /* extract-rules */;
productReference = 1E7C2CFE11F1146300213451 /* extract-rules */;
productType = "com.apple.product-type.tool";
};
1C05BA2D1174CF6C003585B2 /* statistics */ = {
@ -225,7 +225,7 @@
);
name = statistics;
productName = statistics;
productReference = 1C05BA2E1174CF6C003585B2 /* statistics */;
productReference = 1E7C2D0011F1146300213451 /* statistics */;
productType = "com.apple.product-type.tool";
};
1C47578E102B78AD00AB74DB /* score */ = {
@ -241,7 +241,7 @@
);
name = score;
productName = score;
productReference = 1C47578F102B78AD00AB74DB /* score */;
productReference = 1E7C2CF811F1146200213451 /* score */;
productType = "com.apple.product-type.tool";
};
1C4757C3102B7EAA00AB74DB /* consolidate */ = {
@ -257,7 +257,7 @@
);
name = consolidate;
productName = consolidate;
productReference = 1C4757C4102B7EAA00AB74DB /* consolidate */;
productReference = 1E7C2CFA11F1146300213451 /* consolidate */;
productType = "com.apple.product-type.tool";
};
1CF9F719108C9FE700EABCE5 /* consolidate-direct */ = {
@ -273,7 +273,7 @@
);
name = "consolidate-direct";
productName = "consolidate-direct";
productReference = 1CF9F71A108C9FE700EABCE5 /* consolidate-direct */;
productReference = 1E7C2CFC11F1146300213451 /* consolidate-direct */;
productType = "com.apple.product-type.tool";
};
8DD76F620486A84900D96B5E /* extract */ = {
@ -291,7 +291,7 @@
name = extract;
productInstallPath = "$(HOME)/bin";
productName = extract;
productReference = 1C5C088A0FFE54F400B00995 /* extract */;
productReference = 1E7C2CF611F1146200213451 /* extract */;
productType = "com.apple.product-type.tool";
};
/* End PBXNativeTarget section */

View File

@ -444,19 +444,35 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount
}
// alignment info for non-terminals
if (! inverseFlag && hierarchicalFlag)
if (! inverseFlag)
{
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
for(int j = 0; j < phraseT.size() - 1; j++)
{
if (isNonTerminal(vcbT.getWord( phraseT[j] )))
if (hierarchicalFlag)
{ // always output alignment if hiero style, but only for non-terms
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
for(int j = 0; j < phraseT.size() - 1; j++)
{
assert(bestAlignment->alignedToT[ j ].size() == 1);
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
phraseTableFile << sourcePos << "-" << j << " ";
if (isNonTerminal(vcbT.getWord( phraseT[j] )))
{
assert(bestAlignment->alignedToT[ j ].size() == 1);
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
phraseTableFile << sourcePos << "-" << j << " ";
}
}
phraseTableFile << "||| ";
}
else if (wordAlignmentFlag)
{ // alignment info in pb model
for(int j=0;j<bestAlignment->alignedToT.size();j++)
{
const set< size_t > &aligned = bestAlignment->alignedToT[j];
for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p)
{
phraseTableFile << *p << "-" << j << " ";
}
}
phraseTableFile << "||| ";
}
phraseTableFile << "||| ";
}
// phrase translation probability