mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
alignment info in pt
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3358 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
fab2e96d2f
commit
31930eb6fc
@ -265,7 +265,7 @@ class TranslationTask : public Task {
|
||||
|
||||
//detailed translation reporting
|
||||
if (m_detailedTranslationCollector) {
|
||||
ostringstream out;
|
||||
ostringstream out;
|
||||
fix(out);
|
||||
TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
|
||||
m_detailedTranslationCollector->Write(m_lineNumber,out.str());
|
||||
|
@ -39,6 +39,8 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
|
||||
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
|
||||
|
||||
// language model backoff stats,
|
||||
if (doLMStats) {
|
||||
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
|
||||
@ -60,8 +62,9 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
epsilon = true;
|
||||
droppedWords.push_back(source);
|
||||
}
|
||||
os << " SOURCE: " << swr << " " << source << std::endl
|
||||
<< " TRANSLATED AS: " << target << std::endl;
|
||||
os << " SOURCE: " << swr << " " << source << std::endl
|
||||
<< " TRANSLATED AS: " << target << std::endl
|
||||
<< " WORD ALIGNED: " << alignmentInfo << std::endl;
|
||||
size_t twr_i = twr.GetStartPos();
|
||||
size_t swr_i = swr.GetStartPos();
|
||||
if (!epsilon) { sms << twr_i; }
|
||||
|
@ -7,6 +7,10 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1E5D8E0411F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */; };
|
||||
1E5D8E0511F25F03000F027F /* PhraseDictionaryNodeSCFG.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */; };
|
||||
1E5D8E0811F25F2F000F027F /* PhraseDictionarySCFG.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */; };
|
||||
1E5D8E0911F25F2F000F027F /* PhraseDictionarySCFGChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */; };
|
||||
1ED4FD3711BDC0D2004E826A /* AlignmentInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FC5F11BDC0D2004E826A /* AlignmentInfo.cpp */; };
|
||||
1ED4FD3811BDC0D2004E826A /* AlignmentInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FC6011BDC0D2004E826A /* AlignmentInfo.h */; };
|
||||
1ED4FD3911BDC0D2004E826A /* BilingualDynSuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FC6111BDC0D2004E826A /* BilingualDynSuffixArray.cpp */; };
|
||||
@ -133,13 +137,8 @@
|
||||
1ED4FDB511BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */; };
|
||||
1ED4FDB611BDC0D2004E826A /* PhraseDictionaryMemory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */; };
|
||||
1ED4FDB711BDC0D2004E826A /* PhraseDictionaryMemory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */; };
|
||||
1ED4FDB811BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */; };
|
||||
1ED4FDB911BDC0D2004E826A /* PhraseDictionaryNewFormat.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */; };
|
||||
1ED4FDBA11BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */; };
|
||||
1ED4FDBB11BDC0D2004E826A /* PhraseDictionaryNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */; };
|
||||
1ED4FDBC11BDC0D2004E826A /* PhraseDictionaryNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */; };
|
||||
1ED4FDBD11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */; };
|
||||
1ED4FDBE11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */; };
|
||||
1ED4FDBF11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */; };
|
||||
1ED4FDC011BDC0D2004E826A /* PhraseDictionaryOnDisk.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */; };
|
||||
1ED4FDC111BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */; };
|
||||
@ -218,6 +217,10 @@
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNodeSCFG.cpp; path = src/PhraseDictionaryNodeSCFG.cpp; sourceTree = "<group>"; };
|
||||
1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNodeSCFG.h; path = src/PhraseDictionaryNodeSCFG.h; sourceTree = "<group>"; };
|
||||
1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionarySCFG.cpp; path = src/PhraseDictionarySCFG.cpp; sourceTree = "<group>"; };
|
||||
1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionarySCFGChart.cpp; path = src/PhraseDictionarySCFGChart.cpp; sourceTree = "<group>"; };
|
||||
1ED4FC5F11BDC0D2004E826A /* AlignmentInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfo.cpp; path = src/AlignmentInfo.cpp; sourceTree = "<group>"; };
|
||||
1ED4FC6011BDC0D2004E826A /* AlignmentInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AlignmentInfo.h; path = src/AlignmentInfo.h; sourceTree = "<group>"; };
|
||||
1ED4FC6111BDC0D2004E826A /* BilingualDynSuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BilingualDynSuffixArray.cpp; path = src/BilingualDynSuffixArray.cpp; sourceTree = "<group>"; };
|
||||
@ -345,13 +348,8 @@
|
||||
1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryDynSuffixArray.h; path = src/PhraseDictionaryDynSuffixArray.h; sourceTree = "<group>"; };
|
||||
1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryMemory.cpp; path = src/PhraseDictionaryMemory.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryMemory.h; path = src/PhraseDictionaryMemory.h; sourceTree = "<group>"; };
|
||||
1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNewFormat.cpp; path = src/PhraseDictionaryNewFormat.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNewFormat.h; path = src/PhraseDictionaryNewFormat.h; sourceTree = "<group>"; };
|
||||
1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNewFormatChart.cpp; path = src/PhraseDictionaryNewFormatChart.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNode.cpp; path = src/PhraseDictionaryNode.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNode.h; path = src/PhraseDictionaryNode.h; sourceTree = "<group>"; };
|
||||
1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNodeNewFormat.cpp; path = src/PhraseDictionaryNodeNewFormat.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNodeNewFormat.h; path = src/PhraseDictionaryNodeNewFormat.h; sourceTree = "<group>"; };
|
||||
1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryOnDisk.cpp; path = src/PhraseDictionaryOnDisk.cpp; sourceTree = "<group>"; };
|
||||
1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryOnDisk.h; path = src/PhraseDictionaryOnDisk.h; sourceTree = "<group>"; };
|
||||
1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryOnDiskChart.cpp; path = src/PhraseDictionaryOnDiskChart.cpp; sourceTree = "<group>"; };
|
||||
@ -576,16 +574,15 @@
|
||||
1ED4FCE111BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h */,
|
||||
1ED4FCE211BDC0D2004E826A /* PhraseDictionaryMemory.cpp */,
|
||||
1ED4FCE311BDC0D2004E826A /* PhraseDictionaryMemory.h */,
|
||||
1ED4FCE411BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp */,
|
||||
1ED4FCE511BDC0D2004E826A /* PhraseDictionaryNewFormat.h */,
|
||||
1ED4FCE611BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp */,
|
||||
1ED4FCE711BDC0D2004E826A /* PhraseDictionaryNode.cpp */,
|
||||
1ED4FCE811BDC0D2004E826A /* PhraseDictionaryNode.h */,
|
||||
1ED4FCE911BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp */,
|
||||
1ED4FCEA11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h */,
|
||||
1E5D8E0211F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp */,
|
||||
1E5D8E0311F25F03000F027F /* PhraseDictionaryNodeSCFG.h */,
|
||||
1ED4FCEB11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp */,
|
||||
1ED4FCEC11BDC0D2004E826A /* PhraseDictionaryOnDisk.h */,
|
||||
1ED4FCED11BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp */,
|
||||
1E5D8E0611F25F2F000F027F /* PhraseDictionarySCFG.cpp */,
|
||||
1E5D8E0711F25F2F000F027F /* PhraseDictionarySCFGChart.cpp */,
|
||||
1ED4FCEE11BDC0D2004E826A /* PhraseDictionaryTree.cpp */,
|
||||
1ED4FCEF11BDC0D2004E826A /* PhraseDictionaryTree.h */,
|
||||
1ED4FCF011BDC0D2004E826A /* PhraseDictionaryTreeAdaptor.cpp */,
|
||||
@ -767,9 +764,7 @@
|
||||
1ED4FDB311BDC0D2004E826A /* PhraseDictionary.h in Headers */,
|
||||
1ED4FDB511BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.h in Headers */,
|
||||
1ED4FDB711BDC0D2004E826A /* PhraseDictionaryMemory.h in Headers */,
|
||||
1ED4FDB911BDC0D2004E826A /* PhraseDictionaryNewFormat.h in Headers */,
|
||||
1ED4FDBC11BDC0D2004E826A /* PhraseDictionaryNode.h in Headers */,
|
||||
1ED4FDBE11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.h in Headers */,
|
||||
1ED4FDC011BDC0D2004E826A /* PhraseDictionaryOnDisk.h in Headers */,
|
||||
1ED4FDC311BDC0D2004E826A /* PhraseDictionaryTree.h in Headers */,
|
||||
1ED4FDC511BDC0D2004E826A /* PhraseDictionaryTreeAdaptor.h in Headers */,
|
||||
@ -809,6 +804,7 @@
|
||||
1ED4FE0511BDC0D2004E826A /* WordsBitmap.h in Headers */,
|
||||
1ED4FE0711BDC0D2004E826A /* WordsRange.h in Headers */,
|
||||
1ED4FE0911BDC0D2004E826A /* XmlOption.h in Headers */,
|
||||
1E5D8E0511F25F03000F027F /* PhraseDictionaryNodeSCFG.h in Headers */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@ -913,10 +909,7 @@
|
||||
1ED4FDB211BDC0D2004E826A /* PhraseDictionary.cpp in Sources */,
|
||||
1ED4FDB411BDC0D2004E826A /* PhraseDictionaryDynSuffixArray.cpp in Sources */,
|
||||
1ED4FDB611BDC0D2004E826A /* PhraseDictionaryMemory.cpp in Sources */,
|
||||
1ED4FDB811BDC0D2004E826A /* PhraseDictionaryNewFormat.cpp in Sources */,
|
||||
1ED4FDBA11BDC0D2004E826A /* PhraseDictionaryNewFormatChart.cpp in Sources */,
|
||||
1ED4FDBB11BDC0D2004E826A /* PhraseDictionaryNode.cpp in Sources */,
|
||||
1ED4FDBD11BDC0D2004E826A /* PhraseDictionaryNodeNewFormat.cpp in Sources */,
|
||||
1ED4FDBF11BDC0D2004E826A /* PhraseDictionaryOnDisk.cpp in Sources */,
|
||||
1ED4FDC111BDC0D2004E826A /* PhraseDictionaryOnDiskChart.cpp in Sources */,
|
||||
1ED4FDC211BDC0D2004E826A /* PhraseDictionaryTree.cpp in Sources */,
|
||||
@ -953,6 +946,9 @@
|
||||
1ED4FE0411BDC0D2004E826A /* WordsBitmap.cpp in Sources */,
|
||||
1ED4FE0611BDC0D2004E826A /* WordsRange.cpp in Sources */,
|
||||
1ED4FE0811BDC0D2004E826A /* XmlOption.cpp in Sources */,
|
||||
1E5D8E0411F25F03000F027F /* PhraseDictionaryNodeSCFG.cpp in Sources */,
|
||||
1E5D8E0811F25F2F000F027F /* PhraseDictionarySCFG.cpp in Sources */,
|
||||
1E5D8E0911F25F2F000F027F /* PhraseDictionarySCFGChart.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
|
||||
AlignmentInfo::const_iterator iter;
|
||||
for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter)
|
||||
{
|
||||
out << "(" << iter->first << "," << iter->second << ") ";
|
||||
out << iter->first << "-" << iter->second << " ";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -19,7 +19,6 @@
|
||||
***********************************************************************/
|
||||
#include "DotChart.h"
|
||||
#include "Util.h"
|
||||
#include "PhraseDictionaryNodeNewFormat.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -42,7 +41,7 @@ ProcessedRuleStack::~ProcessedRuleStack()
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const ProcessedRule &rule)
|
||||
{
|
||||
const PhraseDictionaryNode &node = rule.GetLastNode();
|
||||
//const PhraseDictionaryNode &node = rule.GetLastNode();
|
||||
//out << node;
|
||||
|
||||
return out;
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include <algorithm>
|
||||
#include "DotChartOnDisk.h"
|
||||
#include "Util.h"
|
||||
#include "PhraseDictionaryNodeNewFormat.h"
|
||||
#include "../../OnDiskPt/src/PhraseNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
@ -440,17 +440,24 @@ void Hypothesis::CleanupArcList()
|
||||
TO_STRING_BODY(Hypothesis)
|
||||
|
||||
// friend
|
||||
ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
|
||||
ostream& operator<<(ostream& out, const Hypothesis& hypo)
|
||||
{
|
||||
hypothesis.ToStream(out);
|
||||
hypo.ToStream(out);
|
||||
// words bitmap
|
||||
out << "[" << hypothesis.m_sourceCompleted << "] ";
|
||||
out << "[" << hypo.m_sourceCompleted << "] ";
|
||||
|
||||
// scores
|
||||
out << " [total=" << hypothesis.GetTotalScore() << "]";
|
||||
out << " " << hypothesis.GetScoreBreakdown();
|
||||
out << " [total=" << hypo.GetTotalScore() << "]";
|
||||
out << " " << hypo.GetScoreBreakdown();
|
||||
|
||||
// alignment
|
||||
out << " " << hypo.GetCurrTargetPhrase().GetAlignmentInfo();
|
||||
|
||||
/*
|
||||
const Hypothesis *prevHypo = hypo.GetPrevHypo();
|
||||
if (prevHypo)
|
||||
out << endl << *prevHypo;
|
||||
*/
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -63,9 +63,9 @@ libmoses_la_HEADERS = \
|
||||
PhraseDictionary.h \
|
||||
PhraseDictionaryDynSuffixArray.h \
|
||||
PhraseDictionaryMemory.h \
|
||||
PhraseDictionaryNewFormat.h \
|
||||
PhraseDictionarySCFG.h \
|
||||
PhraseDictionaryNode.h \
|
||||
PhraseDictionaryNodeNewFormat.h \
|
||||
PhraseDictionaryNodeSCFG.h \
|
||||
PhraseDictionaryOnDisk.h \
|
||||
PhraseDictionaryTree.h \
|
||||
PhraseDictionaryTreeAdaptor.h \
|
||||
@ -185,10 +185,10 @@ libmoses_la_SOURCES = \
|
||||
PhraseDictionary.cpp \
|
||||
PhraseDictionaryDynSuffixArray.cpp \
|
||||
PhraseDictionaryMemory.cpp \
|
||||
PhraseDictionaryNewFormat.cpp \
|
||||
PhraseDictionaryNewFormatChart.cpp \
|
||||
PhraseDictionarySCFG.cpp \
|
||||
PhraseDictionarySCFGChart.cpp \
|
||||
PhraseDictionaryNode.cpp \
|
||||
PhraseDictionaryNodeNewFormat.cpp \
|
||||
PhraseDictionaryNodeSCFG.cpp \
|
||||
PhraseDictionaryOnDisk.cpp \
|
||||
PhraseDictionaryOnDiskChart.cpp \
|
||||
PhraseDictionaryTree.cpp \
|
||||
|
@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include "PhraseDictionary.h"
|
||||
#include "PhraseDictionaryTreeAdaptor.h"
|
||||
#include "PhraseDictionaryNewFormat.h"
|
||||
#include "PhraseDictionarySCFG.h"
|
||||
#include "PhraseDictionaryOnDisk.h"
|
||||
#ifndef WIN32
|
||||
#include "PhraseDictionaryDynSuffixArray.h"
|
||||
@ -65,7 +65,7 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
|
||||
//Thread-safe phrase dictionaries get loaded now
|
||||
if (implementation == Memory || implementation == NewFormat || implementation == OnDisk || implementation == SuffixArray) {
|
||||
if (implementation == Memory || implementation == SCFG || implementation == OnDisk || implementation == SuffixArray) {
|
||||
m_threadSafePhraseDictionary.reset(LoadPhraseTable());
|
||||
m_useThreadSafePhraseDictionary = true;
|
||||
} else {
|
||||
@ -113,7 +113,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
|
||||
, staticData.GetWeightWordPenalty()));
|
||||
return pdta;
|
||||
}
|
||||
else if (m_implementation == NewFormat)
|
||||
else if (m_implementation == SCFG)
|
||||
{ // memory phrase table
|
||||
VERBOSE(2,"using New Format phrase tables" << std::endl);
|
||||
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
|
||||
@ -121,7 +121,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
|
||||
VERBOSE(2,"Using gzipped file" << std::endl);
|
||||
}
|
||||
|
||||
PhraseDictionaryNewFormat* pdm = new PhraseDictionaryNewFormat(m_numScoreComponent,this);
|
||||
PhraseDictionarySCFG* pdm = new PhraseDictionarySCFG(m_numScoreComponent,this);
|
||||
assert(pdm->Load(m_input
|
||||
, m_output
|
||||
, m_filePath
|
||||
|
@ -77,6 +77,8 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
{ // init numElement
|
||||
numElement = tokens.size();
|
||||
assert(numElement == 3 || numElement == 5);
|
||||
// Pharoah style: source ||| target ||| scores
|
||||
// New moses style: source ||| target ||| alignment ||| scores ||| count
|
||||
}
|
||||
|
||||
if (tokens.size() != numElement)
|
||||
@ -87,19 +89,16 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
abort();
|
||||
}
|
||||
|
||||
string sourcePhraseString, targetPhraseString;
|
||||
string scoreString;
|
||||
string sourceAlignString, targetAlignString;
|
||||
const string *scoreString;
|
||||
|
||||
const string &sourcePhraseString=tokens[0]
|
||||
,&targetPhraseString=tokens[1];
|
||||
|
||||
sourcePhraseString=tokens[0];
|
||||
targetPhraseString=tokens[1];
|
||||
if (numElement==3){
|
||||
scoreString=tokens[2];
|
||||
scoreString = &tokens[2];
|
||||
}
|
||||
else{
|
||||
sourceAlignString=tokens[2];
|
||||
targetAlignString=tokens[3];
|
||||
scoreString=tokens[4];
|
||||
scoreString = &tokens[3];
|
||||
}
|
||||
|
||||
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
|
||||
@ -112,7 +111,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
if (sourcePhraseString != prevSourcePhrase)
|
||||
phraseVector = Phrase::Parse(sourcePhraseString, input, factorDelimiter);
|
||||
|
||||
vector<float> scoreVector = Tokenize<float>(scoreString);
|
||||
vector<float> scoreVector = Tokenize<float>(*scoreString);
|
||||
if (scoreVector.size() != m_numScoreComponent)
|
||||
{
|
||||
stringstream strme;
|
||||
@ -120,7 +119,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
UserMessage::Add(strme.str());
|
||||
abort();
|
||||
}
|
||||
// assert(scoreVector.size() == m_numScoreComponent);
|
||||
|
||||
// source
|
||||
Phrase sourcePhrase(Input);
|
||||
@ -129,8 +127,8 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
TargetPhrase targetPhrase(Output);
|
||||
targetPhrase.SetSourcePhrase(&sourcePhrase);
|
||||
targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
|
||||
|
||||
|
||||
if (numElement == 5)
|
||||
targetPhrase.SetAlignmentInfo(tokens[2]);
|
||||
|
||||
// component score, for n-best output
|
||||
std::vector<float> scv(scoreVector.size());
|
||||
|
@ -1,4 +1,4 @@
|
||||
// $Id: PhraseDictionaryNodeNewFormat.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
// $Id: PhraseDictionaryNodeSCFG.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
@ -19,27 +19,27 @@ License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "PhraseDictionaryNodeNewFormat.h"
|
||||
#include "PhraseDictionaryNodeSCFG.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "PhraseDictionaryMemory.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
size_t PhraseDictionaryNodeNewFormat::s_id = 0;
|
||||
size_t PhraseDictionaryNodeSCFG::s_id = 0;
|
||||
|
||||
PhraseDictionaryNodeNewFormat::~PhraseDictionaryNodeNewFormat()
|
||||
PhraseDictionaryNodeSCFG::~PhraseDictionaryNodeSCFG()
|
||||
{
|
||||
delete m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
void PhraseDictionaryNodeNewFormat::CleanUp()
|
||||
void PhraseDictionaryNodeSCFG::CleanUp()
|
||||
{
|
||||
delete m_targetPhraseCollection;
|
||||
m_targetPhraseCollection = NULL;
|
||||
m_map.clear();
|
||||
}
|
||||
|
||||
void PhraseDictionaryNodeNewFormat::Sort(size_t tableLimit)
|
||||
void PhraseDictionaryNodeSCFG::Sort(size_t tableLimit)
|
||||
{
|
||||
// recusively sort
|
||||
NodeMap::iterator iter;
|
||||
@ -58,7 +58,7 @@ void PhraseDictionaryNodeNewFormat::Sort(size_t tableLimit)
|
||||
m_targetPhraseCollection->NthElement(tableLimit);
|
||||
}
|
||||
|
||||
PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetOrCreateChild(const Word &word, const Word &sourcelabel)
|
||||
PhraseDictionaryNodeSCFG *PhraseDictionaryNodeSCFG::GetOrCreateChild(const Word &word, const Word &sourcelabel)
|
||||
{
|
||||
InnerNodeMap *innerNodeMap;
|
||||
innerNodeMap = &m_map[sourcelabel];
|
||||
@ -69,17 +69,17 @@ PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetOrCreateChild(c
|
||||
|
||||
// can't find node. create a new 1
|
||||
std::pair <InnerNodeMap::iterator,bool> insResult;
|
||||
insResult = innerNodeMap->insert( std::make_pair(word, PhraseDictionaryNodeNewFormat()) );
|
||||
insResult = innerNodeMap->insert( std::make_pair(word, PhraseDictionaryNodeSCFG()) );
|
||||
assert(insResult.second);
|
||||
|
||||
iter = insResult.first;
|
||||
PhraseDictionaryNodeNewFormat &ret = iter->second;
|
||||
PhraseDictionaryNodeSCFG &ret = iter->second;
|
||||
ret.SetSourceWord(iter->first);
|
||||
//ret.SetSourceWord(word);
|
||||
return &ret;
|
||||
}
|
||||
|
||||
const PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetChild(const Word &word, const Word &sourcelabel) const
|
||||
const PhraseDictionaryNodeSCFG *PhraseDictionaryNodeSCFG::GetChild(const Word &word, const Word &sourcelabel) const
|
||||
{
|
||||
NodeMap::const_iterator iterOuter = m_map.find(sourcelabel);
|
||||
if (iterOuter == m_map.end())
|
||||
@ -95,7 +95,7 @@ const PhraseDictionaryNodeNewFormat *PhraseDictionaryNodeNewFormat::GetChild(con
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryNodeNewFormat::SetWeightTransModel(const PhraseDictionary *phraseDictionary
|
||||
void PhraseDictionaryNodeSCFG::SetWeightTransModel(const PhraseDictionary *phraseDictionary
|
||||
, const std::vector<float> &weightT)
|
||||
{
|
||||
// recursively set weights
|
||||
@ -125,13 +125,13 @@ void PhraseDictionaryNodeNewFormat::SetWeightTransModel(const PhraseDictionary *
|
||||
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeNewFormat &node)
|
||||
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeSCFG &node)
|
||||
{
|
||||
out << node.GetTargetPhraseCollection();
|
||||
return out;
|
||||
}
|
||||
|
||||
TO_STRING_BODY(PhraseDictionaryNodeNewFormat)
|
||||
TO_STRING_BODY(PhraseDictionaryNodeSCFG)
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
// $Id: PhraseDictionaryNodeNewFormat.h 3049 2010-04-05 18:34:09Z hieuhoang1972 $
|
||||
// $Id: PhraseDictionaryNodeSCFG.h 3049 2010-04-05 18:34:09Z hieuhoang1972 $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
@ -41,18 +41,18 @@ class InputType;
|
||||
|
||||
/** One node of the PhraseDictionaryMemory structure
|
||||
*/
|
||||
class PhraseDictionaryNodeNewFormat : public PhraseDictionaryNode
|
||||
class PhraseDictionaryNodeSCFG : public PhraseDictionaryNode
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNodeNewFormat&);
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNodeSCFG&);
|
||||
|
||||
typedef std::map<Word, PhraseDictionaryNodeNewFormat> InnerNodeMap;
|
||||
typedef std::map<Word, PhraseDictionaryNodeSCFG> InnerNodeMap;
|
||||
typedef std::map<Word, InnerNodeMap> NodeMap;
|
||||
// 1st word = source side non-term, or the word if term
|
||||
// 2nd word = target side non term, or the word if term
|
||||
|
||||
// only these classes are allowed to instantiate this class
|
||||
friend class PhraseDictionaryNewFormat;
|
||||
friend class std::map<Word, PhraseDictionaryNodeNewFormat>;
|
||||
friend class PhraseDictionarySCFG;
|
||||
friend class std::map<Word, PhraseDictionaryNodeSCFG>;
|
||||
|
||||
protected:
|
||||
static size_t s_id;
|
||||
@ -62,18 +62,18 @@ protected:
|
||||
const Word *m_sourceWord;
|
||||
float m_entropy;
|
||||
|
||||
PhraseDictionaryNodeNewFormat()
|
||||
PhraseDictionaryNodeSCFG()
|
||||
:m_id(s_id++)
|
||||
,m_targetPhraseCollection(NULL)
|
||||
,m_sourceWord(NULL)
|
||||
{}
|
||||
public:
|
||||
virtual ~PhraseDictionaryNodeNewFormat();
|
||||
virtual ~PhraseDictionaryNodeSCFG();
|
||||
|
||||
void CleanUp();
|
||||
void Sort(size_t tableLimit);
|
||||
PhraseDictionaryNodeNewFormat *GetOrCreateChild(const Word &word, const Word &sourcelabel);
|
||||
const PhraseDictionaryNodeNewFormat *GetChild(const Word &word, const Word &sourcelabel) const;
|
||||
PhraseDictionaryNodeSCFG *GetOrCreateChild(const Word &word, const Word &sourcelabel);
|
||||
const PhraseDictionaryNodeSCFG *GetChild(const Word &word, const Word &sourcelabel) const;
|
||||
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection() const
|
||||
{ return m_targetPhraseCollection; }
|
@ -1,4 +1,4 @@
|
||||
// $Id: PhraseDictionaryNewFormat.cpp 3056 2010-04-06 13:40:24Z hieuhoang1972 $
|
||||
// $Id: PhraseDictionarySCFG.cpp 3056 2010-04-06 13:40:24Z hieuhoang1972 $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
@ -25,7 +25,7 @@
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <sys/stat.h>
|
||||
#include "PhraseDictionaryNewFormat.h"
|
||||
#include "PhraseDictionarySCFG.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "Word.h"
|
||||
#include "Util.h"
|
||||
@ -55,25 +55,8 @@ inline void TransformString(vector< vector<string>* > &phraseVector)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CreateAlignmentInfo(list<pair<size_t,size_t> > &alignmentInfo, const string &alignString)
|
||||
{
|
||||
vector<string> alignVec = Tokenize(alignString);
|
||||
|
||||
vector<string>::const_iterator iter;
|
||||
for (iter = alignVec.begin(); iter != alignVec.end(); ++iter)
|
||||
{
|
||||
const string &align1 = *iter;
|
||||
vector<size_t> alignPos = Tokenize<size_t>(align1, "-");
|
||||
assert(alignPos.size() == 2);
|
||||
size_t &sourcePos = alignPos[0]
|
||||
,&targetPos = alignPos[1];
|
||||
|
||||
alignmentInfo.push_back(pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::CreateSourceLabels(vector<Word> &sourceLabels
|
||||
void PhraseDictionarySCFG::CreateSourceLabels(vector<Word> &sourceLabels
|
||||
, const vector<string> &sourceLabelsStr) const
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
@ -90,7 +73,7 @@ void PhraseDictionaryNewFormat::CreateSourceLabels(vector<Word> &sourceLabels
|
||||
}
|
||||
}
|
||||
|
||||
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const string &filePath
|
||||
, const vector<float> &weight
|
||||
@ -112,7 +95,7 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, std::istream &inStream
|
||||
, const std::vector<float> &weight
|
||||
@ -125,7 +108,7 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
|
||||
VERBOSE(2,"PhraseDictionaryNewFormat: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
|
||||
VERBOSE(2,"PhraseDictionarySCFG: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
|
||||
|
||||
string line;
|
||||
size_t count = 0;
|
||||
@ -179,12 +162,8 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(Output);
|
||||
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
|
||||
|
||||
// alignment
|
||||
list<pair<size_t,size_t> > alignmentInfo;
|
||||
CreateAlignmentInfo(alignmentInfo, alignString);
|
||||
|
||||
// rest of target phrase
|
||||
targetPhrase->SetAlignmentInfo(alignmentInfo);
|
||||
targetPhrase->SetAlignmentInfo(alignString);
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
|
||||
|
||||
@ -212,20 +191,20 @@ bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
|
||||
return true;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &PhraseDictionaryNewFormat::GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target)
|
||||
TargetPhraseCollection &PhraseDictionarySCFG::GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target)
|
||||
{
|
||||
PhraseDictionaryNodeNewFormat &currNode = GetOrCreateNode(source, target);
|
||||
PhraseDictionaryNodeSCFG &currNode = GetOrCreateNode(source, target);
|
||||
return currNode.GetOrCreateTargetPhraseCollection();
|
||||
}
|
||||
|
||||
PhraseDictionaryNodeNewFormat &PhraseDictionaryNewFormat::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
|
||||
PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
|
||||
{
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
|
||||
|
||||
PhraseDictionaryNodeNewFormat *currNode = &m_collection;
|
||||
PhraseDictionaryNodeSCFG *currNode = &m_collection;
|
||||
for (size_t pos = 0 ; pos < size ; ++pos)
|
||||
{
|
||||
const Word& word = source.GetWord(pos);
|
||||
@ -253,25 +232,25 @@ PhraseDictionaryNodeNewFormat &PhraseDictionaryNewFormat::GetOrCreateNode(const
|
||||
return *currNode;
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
|
||||
void PhraseDictionarySCFG::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
|
||||
{
|
||||
assert(false); // TODO
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::AddEquivPhrase(TargetPhraseCollection &targetPhraseColl, TargetPhrase *targetPhrase)
|
||||
void PhraseDictionarySCFG::AddEquivPhrase(TargetPhraseCollection &targetPhraseColl, TargetPhrase *targetPhrase)
|
||||
{
|
||||
targetPhraseColl.Add(targetPhrase);
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryNewFormat::GetTargetPhraseCollection(const Phrase &source) const
|
||||
const TargetPhraseCollection *PhraseDictionarySCFG::GetTargetPhraseCollection(const Phrase &source) const
|
||||
{ // exactly like CreateTargetPhraseCollection, but don't create
|
||||
assert(false);
|
||||
return NULL;
|
||||
/*
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const PhraseDictionaryNodeNewFormat *currNode = &m_collection;
|
||||
const PhraseDictionaryNodeSCFG *currNode = &m_collection;
|
||||
for (size_t pos = 0 ; pos < size ; ++pos)
|
||||
{
|
||||
const Word& word = source.GetWord(pos);
|
||||
@ -284,7 +263,7 @@ const TargetPhraseCollection *PhraseDictionaryNewFormat::GetTargetPhraseCollecti
|
||||
*/
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input)
|
||||
void PhraseDictionarySCFG::InitializeForInput(const InputType& input)
|
||||
{
|
||||
assert(m_runningNodesVec.size() == 0);
|
||||
size_t sourceSize = input.GetSize();
|
||||
@ -301,29 +280,29 @@ void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input)
|
||||
}
|
||||
}
|
||||
|
||||
PhraseDictionaryNewFormat::~PhraseDictionaryNewFormat()
|
||||
PhraseDictionarySCFG::~PhraseDictionarySCFG()
|
||||
{
|
||||
CleanUp();
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::SetWeightTransModel(const vector<float> &weightT)
|
||||
void PhraseDictionarySCFG::SetWeightTransModel(const vector<float> &weightT)
|
||||
{
|
||||
PhraseDictionaryNodeNewFormat::iterator iterDict;
|
||||
PhraseDictionaryNodeSCFG::iterator iterDict;
|
||||
for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict)
|
||||
{
|
||||
PhraseDictionaryNodeNewFormat::InnerNodeMap &innerNode = iterDict->second;
|
||||
PhraseDictionaryNodeNewFormat::InnerNodeMap::iterator iterInner;
|
||||
PhraseDictionaryNodeSCFG::InnerNodeMap &innerNode = iterDict->second;
|
||||
PhraseDictionaryNodeSCFG::InnerNodeMap::iterator iterInner;
|
||||
for (iterInner = innerNode.begin() ; iterInner != innerNode.end() ; ++iterInner)
|
||||
{
|
||||
// recursively set weights in nodes
|
||||
PhraseDictionaryNodeNewFormat &node = iterInner->second;
|
||||
PhraseDictionaryNodeSCFG &node = iterInner->second;
|
||||
node.SetWeightTransModel(this, weightT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryNewFormat::CleanUp()
|
||||
void PhraseDictionarySCFG::CleanUp()
|
||||
{
|
||||
//RemoveAllInColl(m_chartTargetPhraseColl);
|
||||
std::vector<ChartRuleCollection*>::iterator iter;
|
||||
@ -337,13 +316,13 @@ void PhraseDictionaryNewFormat::CleanUp()
|
||||
RemoveAllInColl(m_runningNodesVec);
|
||||
}
|
||||
|
||||
TO_STRING_BODY(PhraseDictionaryNewFormat);
|
||||
TO_STRING_BODY(PhraseDictionarySCFG);
|
||||
|
||||
// friend
|
||||
ostream& operator<<(ostream& out, const PhraseDictionaryNewFormat& phraseDict)
|
||||
ostream& operator<<(ostream& out, const PhraseDictionarySCFG& phraseDict)
|
||||
{
|
||||
const PhraseDictionaryNodeNewFormat &coll = phraseDict.m_collection;
|
||||
PhraseDictionaryNodeNewFormat::const_iterator iter;
|
||||
const PhraseDictionaryNodeSCFG &coll = phraseDict.m_collection;
|
||||
PhraseDictionaryNodeSCFG::const_iterator iter;
|
||||
for (iter = coll.begin() ; iter != coll.end() ; ++iter)
|
||||
{
|
||||
const Word &word = (*iter).first;
|
@ -1,4 +1,4 @@
|
||||
// $Id: PhraseDictionaryNewFormat.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
// $Id: PhraseDictionarySCFG.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
@ -23,7 +23,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "PhraseDictionary.h"
|
||||
#include "PhraseDictionaryNodeNewFormat.h"
|
||||
#include "PhraseDictionaryNodeSCFG.h"
|
||||
#include "ChartRuleCollection.h"
|
||||
#include "CellCollection.h"
|
||||
|
||||
@ -35,13 +35,13 @@ namespace Moses
|
||||
/*** Implementation of a phrase table in a trie. Looking up a phrase of
|
||||
* length n words requires n look-ups to find the TargetPhraseCollection.
|
||||
*/
|
||||
class PhraseDictionaryNewFormat : public PhraseDictionary
|
||||
class PhraseDictionarySCFG : public PhraseDictionary
|
||||
{
|
||||
typedef PhraseDictionary MyBase;
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryNewFormat&);
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionarySCFG&);
|
||||
|
||||
protected:
|
||||
PhraseDictionaryNodeNewFormat m_collection;
|
||||
PhraseDictionaryNodeSCFG m_collection;
|
||||
mutable std::vector<ChartRuleCollection*> m_chartTargetPhraseColl;
|
||||
mutable std::vector<ProcessedRuleStack*> m_runningNodesVec;
|
||||
|
||||
@ -51,7 +51,7 @@ namespace Moses
|
||||
std::string m_filePath;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target);
|
||||
PhraseDictionaryNodeNewFormat &GetOrCreateNode(const Phrase &source, const TargetPhrase &target);
|
||||
PhraseDictionaryNodeSCFG &GetOrCreateNode(const Phrase &source, const TargetPhrase &target);
|
||||
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
@ -68,13 +68,13 @@ namespace Moses
|
||||
Word CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const;
|
||||
|
||||
public:
|
||||
PhraseDictionaryNewFormat(size_t numScoreComponent, PhraseDictionaryFeature* feature)
|
||||
PhraseDictionarySCFG(size_t numScoreComponent, PhraseDictionaryFeature* feature)
|
||||
: MyBase(numScoreComponent, feature)
|
||||
, m_prevSource(Input)
|
||||
, m_prevPhraseColl(NULL)
|
||||
{
|
||||
}
|
||||
virtual ~PhraseDictionaryNewFormat();
|
||||
virtual ~PhraseDictionarySCFG();
|
||||
|
||||
std::string GetScoreProducerDescription() const
|
||||
{ return "Hieu's Reordering Model"; }
|
@ -1,4 +1,4 @@
|
||||
// $Id: PhraseDictionaryNewFormat.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
// $Id: PhraseDictionarySCFG.h 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
// vim:tabstop=2
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
@ -19,7 +19,7 @@
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "PhraseDictionaryNewFormat.h"
|
||||
#include "PhraseDictionarySCFG.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "InputType.h"
|
||||
#include "ChartRuleCollection.h"
|
||||
@ -31,7 +31,7 @@
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
|
||||
Word PhraseDictionaryNewFormat::CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const
|
||||
Word PhraseDictionarySCFG::CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const
|
||||
{
|
||||
string coveredWordsString = origSourceLabel.GetFactor(0)->GetString();
|
||||
|
||||
@ -51,7 +51,7 @@ Word PhraseDictionaryNewFormat::CreateCoveredWord(const Word &origSourceLabel, c
|
||||
return ret;
|
||||
}
|
||||
|
||||
const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
const ChartRuleCollection *PhraseDictionarySCFG::GetChartRuleCollection(
|
||||
InputType const& src
|
||||
,WordsRange const& range
|
||||
,bool adhereTableLimit
|
||||
@ -71,7 +71,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
{
|
||||
const SavedNode &savedNode = *savedNodeColl[ind];
|
||||
const ProcessedRule &prevProcessedRule = savedNode.GetProcessedRule();
|
||||
const PhraseDictionaryNodeNewFormat &prevNode = static_cast<const PhraseDictionaryNodeNewFormat &>(prevProcessedRule.GetLastNode());
|
||||
const PhraseDictionaryNodeSCFG &prevNode = static_cast<const PhraseDictionaryNodeSCFG &>(prevProcessedRule.GetLastNode());
|
||||
const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed();
|
||||
size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1;
|
||||
|
||||
@ -79,7 +79,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
if (startPos == absEndPos)
|
||||
{
|
||||
const Word &sourceWord = src.GetWord(absEndPos);
|
||||
const PhraseDictionaryNodeNewFormat *node = prevNode.GetChild(sourceWord, sourceWord);
|
||||
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord, sourceWord);
|
||||
if (node != NULL)
|
||||
{
|
||||
const Word &sourceWord = node->GetSourceWord();
|
||||
@ -123,7 +123,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
{
|
||||
const Word &headWord = *iterHeadWords;
|
||||
|
||||
const PhraseDictionaryNodeNewFormat *node = prevNode.GetChild(headWord, sourceLabel);
|
||||
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(headWord, sourceLabel);
|
||||
if (node != NULL)
|
||||
{
|
||||
//const Word &sourceWord = node->GetSourceWord();
|
||||
@ -147,7 +147,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
for (iterNode = nodes.begin(); iterNode != nodes.end(); ++iterNode)
|
||||
{
|
||||
const ProcessedRule &processedRule = **iterNode;
|
||||
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat &>(processedRule.GetLastNode());
|
||||
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG &>(processedRule.GetLastNode());
|
||||
const WordConsumed *wordConsumed = processedRule.GetLastWordConsumed();
|
||||
assert(wordConsumed);
|
||||
|
||||
@ -163,7 +163,7 @@ const ChartRuleCollection *PhraseDictionaryNewFormat::GetChartRuleCollection(
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
|
||||
void PhraseDictionarySCFG::DeleteDuplicates(ProcessedRuleColl &nodes) const
|
||||
{
|
||||
map<size_t, float> minEntropy;
|
||||
map<size_t, float>::iterator iterEntropy;
|
||||
@ -173,7 +173,7 @@ void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
|
||||
for (iter = nodes.begin(); iter != nodes.end(); ++iter)
|
||||
{
|
||||
const ProcessedRule *processedRule = *iter;
|
||||
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat&> (processedRule->GetLastNode());
|
||||
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG&> (processedRule->GetLastNode());
|
||||
size_t nodeId = node.GetId();
|
||||
float entropy = node.GetEntropy();
|
||||
|
||||
@ -197,7 +197,7 @@ void PhraseDictionaryNewFormat::DeleteDuplicates(ProcessedRuleColl &nodes) const
|
||||
while (ind < nodes.GetSize())
|
||||
{
|
||||
const ProcessedRule &processedRule = nodes.Get(ind);
|
||||
const PhraseDictionaryNodeNewFormat &node = static_cast<const PhraseDictionaryNodeNewFormat&> (processedRule.GetLastNode());
|
||||
const PhraseDictionaryNodeSCFG &node = static_cast<const PhraseDictionaryNodeSCFG&> (processedRule.GetLastNode());
|
||||
size_t nodeId = node.GetId();
|
||||
float entropy = node.GetEntropy();
|
||||
float minEntropy1 = minEntropy[nodeId];
|
@ -487,19 +487,18 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
abort();
|
||||
}
|
||||
|
||||
std::string sourcePhraseString, targetPhraseString;
|
||||
std::string scoreString;
|
||||
std::string *scoreString;
|
||||
std::string sourceAlignString, targetAlignString;
|
||||
|
||||
sourcePhraseString=tokens[0];
|
||||
targetPhraseString=tokens[1];
|
||||
const std::string &sourcePhraseString=tokens[0]
|
||||
,&targetPhraseString=tokens[1];
|
||||
if (numElement==3){
|
||||
scoreString=tokens[2];
|
||||
scoreString = &tokens[2];
|
||||
}
|
||||
else{
|
||||
sourceAlignString=tokens[2];
|
||||
targetAlignString=tokens[3];
|
||||
scoreString=tokens[4];
|
||||
scoreString = &tokens[3];
|
||||
}
|
||||
|
||||
|
||||
@ -540,13 +539,12 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
|
||||
// while(is>>w && w!="|||") sc.push_back(atof(w.c_str()));
|
||||
// Mauro: to handle 0 probs in phrase tables
|
||||
std::vector<float> scoreVector = Tokenize<float>(scoreString);
|
||||
std::vector<float> scoreVector = Tokenize<float>(*scoreString);
|
||||
for (size_t i = 0 ; i < scoreVector.size() ; ++i)
|
||||
{
|
||||
float tmp = scoreVector[i];
|
||||
sc.push_back(((tmp>0.0)?tmp:(float)1.0e-38));
|
||||
}
|
||||
|
||||
|
||||
if(f.empty())
|
||||
{
|
||||
|
@ -276,6 +276,26 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
|
||||
return clone;
|
||||
}
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const std::string &alignString)
|
||||
{
|
||||
list<pair<size_t,size_t> > alignmentInfo;
|
||||
vector<string> alignVec = Tokenize(alignString);
|
||||
|
||||
vector<string>::const_iterator iter;
|
||||
for (iter = alignVec.begin(); iter != alignVec.end(); ++iter)
|
||||
{
|
||||
const string &align1 = *iter;
|
||||
vector<size_t> alignPos = Tokenize<size_t>(align1, "-");
|
||||
assert(alignPos.size() == 2);
|
||||
size_t &sourcePos = alignPos[0]
|
||||
,&targetPos = alignPos[1];
|
||||
|
||||
alignmentInfo.push_back(pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
|
||||
SetAlignmentInfo(alignmentInfo);
|
||||
}
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const std::list<std::pair<size_t,size_t> > &alignmentInfo)
|
||||
{
|
||||
m_alignmentInfo.AddAlignment(alignmentInfo);
|
||||
|
@ -169,6 +169,7 @@ public:
|
||||
const Word &GetTargetLHS() const
|
||||
{ return m_lhsTarget; }
|
||||
|
||||
void SetAlignmentInfo(const std::string &alignString);
|
||||
void SetAlignmentInfo(const std::list<std::pair<size_t,size_t> > &alignmentInfo);
|
||||
|
||||
AlignmentInfo &GetAlignmentInfo()
|
||||
|
@ -163,7 +163,7 @@ enum PhraseTableImplementation
|
||||
//,GlueRule = 3
|
||||
//,Joshua = 4
|
||||
//,MemorySourceLabel = 5
|
||||
,NewFormat = 6
|
||||
,SCFG = 6
|
||||
//,BerkeleyDb = 7
|
||||
,SuffixArray = 8
|
||||
};
|
||||
|
@ -112,6 +112,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
// indirect: source target probabilities
|
||||
|
||||
// consistency checks
|
||||
/*
|
||||
size_t expectedSize = (hierarchicalFlag ? 5 : 4);
|
||||
if (itemDirect.size() != expectedSize)
|
||||
{
|
||||
@ -126,7 +127,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
<< fileNameIndirect << ", line " << i << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
if (itemDirect[0].compare( itemIndirect[0] ) != 0)
|
||||
{
|
||||
cerr << "ERROR: target phrase does not match in line " << i << ": '"
|
||||
@ -145,13 +147,9 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " ||| ";
|
||||
|
||||
// output alignment and probabilities
|
||||
if (hierarchicalFlag)
|
||||
fileConsolidated << itemDirect[2] << " ||| " // alignment
|
||||
fileConsolidated << itemDirect[2] << " ||| " // alignment
|
||||
<< itemIndirect[2] // prob indirect
|
||||
<< " " << itemDirect[3]; // prob direct
|
||||
else
|
||||
fileConsolidated << itemIndirect[2] // prob indirect
|
||||
<< " " << itemDirect[2]; // prob direct
|
||||
fileConsolidated << " " << (logProbFlag ? 1 : 2.718); // phrase count feature
|
||||
|
||||
// counts
|
||||
|
@ -67,22 +67,22 @@
|
||||
1C05B9E91174CC24003585B2 /* statistics.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = statistics.cpp; sourceTree = "<group>"; };
|
||||
1C05B9EA1174CC24003585B2 /* SyntaxTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SyntaxTree.cpp; sourceTree = "<group>"; };
|
||||
1C05B9EB1174CC24003585B2 /* SyntaxTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SyntaxTree.h; sourceTree = "<group>"; };
|
||||
1C05B9F71174CE51003585B2 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1C05BA1F1174CEE8003585B2 /* XmlTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = XmlTree.cpp; sourceTree = "<group>"; };
|
||||
1C05BA201174CEE8003585B2 /* XmlTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XmlTree.h; sourceTree = "<group>"; };
|
||||
1C05BA2E1174CF6C003585B2 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1C47578F102B78AD00AB74DB /* score */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = score; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1C475794102B78DD00AB74DB /* score.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = score.cpp; sourceTree = "<group>"; };
|
||||
1C4757C4102B7EAA00AB74DB /* consolidate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = consolidate; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1C5C088A0FFE54F400B00995 /* extract */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extract; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1C6A83031111F5A300059E7F /* AlignmentPhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = AlignmentPhrase.cpp; sourceTree = "<group>"; };
|
||||
1C6A83041111F5A300059E7F /* AlignmentPhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AlignmentPhrase.h; sourceTree = "<group>"; };
|
||||
1CE8CE2C0FC6EA0200924FEA /* extract.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = extract.cpp; sourceTree = "<group>"; };
|
||||
1CE8CE4B0FC6EAA200924FEA /* tables-core.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "tables-core.cpp"; sourceTree = "<group>"; };
|
||||
1CE8CE4C0FC6EAA200924FEA /* tables-core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "tables-core.h"; sourceTree = "<group>"; };
|
||||
1CF9F71A108C9FE700EABCE5 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1CFE962311762A20006FF13B /* consolidate-direct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "consolidate-direct.cpp"; sourceTree = "<group>"; };
|
||||
1CFE962411762A20006FF13B /* consolidate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consolidate.cpp; sourceTree = "<group>"; };
|
||||
1E7C2CF611F1146200213451 /* extract */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extract; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E7C2CF811F1146200213451 /* score */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = score; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E7C2CFA11F1146300213451 /* consolidate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = consolidate; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E7C2CFC11F1146300213451 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E7C2CFE11F1146300213451 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E7C2D0011F1146300213451 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -176,12 +176,12 @@
|
||||
1AB674ADFE9D54B511CA2CBB /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1C5C088A0FFE54F400B00995 /* extract */,
|
||||
1C47578F102B78AD00AB74DB /* score */,
|
||||
1C4757C4102B7EAA00AB74DB /* consolidate */,
|
||||
1CF9F71A108C9FE700EABCE5 /* consolidate-direct */,
|
||||
1C05B9F71174CE51003585B2 /* extract-rules */,
|
||||
1C05BA2E1174CF6C003585B2 /* statistics */,
|
||||
1E7C2CF611F1146200213451 /* extract */,
|
||||
1E7C2CF811F1146200213451 /* score */,
|
||||
1E7C2CFA11F1146300213451 /* consolidate */,
|
||||
1E7C2CFC11F1146300213451 /* consolidate-direct */,
|
||||
1E7C2CFE11F1146300213451 /* extract-rules */,
|
||||
1E7C2D0011F1146300213451 /* statistics */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
@ -209,7 +209,7 @@
|
||||
);
|
||||
name = "extract-rules";
|
||||
productName = "extract-rules";
|
||||
productReference = 1C05B9F71174CE51003585B2 /* extract-rules */;
|
||||
productReference = 1E7C2CFE11F1146300213451 /* extract-rules */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1C05BA2D1174CF6C003585B2 /* statistics */ = {
|
||||
@ -225,7 +225,7 @@
|
||||
);
|
||||
name = statistics;
|
||||
productName = statistics;
|
||||
productReference = 1C05BA2E1174CF6C003585B2 /* statistics */;
|
||||
productReference = 1E7C2D0011F1146300213451 /* statistics */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1C47578E102B78AD00AB74DB /* score */ = {
|
||||
@ -241,7 +241,7 @@
|
||||
);
|
||||
name = score;
|
||||
productName = score;
|
||||
productReference = 1C47578F102B78AD00AB74DB /* score */;
|
||||
productReference = 1E7C2CF811F1146200213451 /* score */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1C4757C3102B7EAA00AB74DB /* consolidate */ = {
|
||||
@ -257,7 +257,7 @@
|
||||
);
|
||||
name = consolidate;
|
||||
productName = consolidate;
|
||||
productReference = 1C4757C4102B7EAA00AB74DB /* consolidate */;
|
||||
productReference = 1E7C2CFA11F1146300213451 /* consolidate */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1CF9F719108C9FE700EABCE5 /* consolidate-direct */ = {
|
||||
@ -273,7 +273,7 @@
|
||||
);
|
||||
name = "consolidate-direct";
|
||||
productName = "consolidate-direct";
|
||||
productReference = 1CF9F71A108C9FE700EABCE5 /* consolidate-direct */;
|
||||
productReference = 1E7C2CFC11F1146300213451 /* consolidate-direct */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
8DD76F620486A84900D96B5E /* extract */ = {
|
||||
@ -291,7 +291,7 @@
|
||||
name = extract;
|
||||
productInstallPath = "$(HOME)/bin";
|
||||
productName = extract;
|
||||
productReference = 1C5C088A0FFE54F400B00995 /* extract */;
|
||||
productReference = 1E7C2CF611F1146200213451 /* extract */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
@ -444,19 +444,35 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount
|
||||
}
|
||||
|
||||
// alignment info for non-terminals
|
||||
if (! inverseFlag && hierarchicalFlag)
|
||||
if (! inverseFlag)
|
||||
{
|
||||
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
|
||||
for(int j = 0; j < phraseT.size() - 1; j++)
|
||||
{
|
||||
if (isNonTerminal(vcbT.getWord( phraseT[j] )))
|
||||
if (hierarchicalFlag)
|
||||
{ // always output alignment if hiero style, but only for non-terms
|
||||
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
|
||||
for(int j = 0; j < phraseT.size() - 1; j++)
|
||||
{
|
||||
assert(bestAlignment->alignedToT[ j ].size() == 1);
|
||||
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
|
||||
phraseTableFile << sourcePos << "-" << j << " ";
|
||||
if (isNonTerminal(vcbT.getWord( phraseT[j] )))
|
||||
{
|
||||
assert(bestAlignment->alignedToT[ j ].size() == 1);
|
||||
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
|
||||
phraseTableFile << sourcePos << "-" << j << " ";
|
||||
}
|
||||
}
|
||||
phraseTableFile << "||| ";
|
||||
}
|
||||
else if (wordAlignmentFlag)
|
||||
{ // alignment info in pb model
|
||||
for(int j=0;j<bestAlignment->alignedToT.size();j++)
|
||||
{
|
||||
const set< size_t > &aligned = bestAlignment->alignedToT[j];
|
||||
for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p)
|
||||
{
|
||||
phraseTableFile << *p << "-" << j << " ";
|
||||
}
|
||||
}
|
||||
phraseTableFile << "||| ";
|
||||
|
||||
}
|
||||
phraseTableFile << "||| ";
|
||||
}
|
||||
|
||||
// phrase translation probability
|
||||
|
Loading…
Reference in New Issue
Block a user