mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
Tree fragments in GHKM glue rules;
output of LHS tag in tree fragments for UNKs; GHKMParse info is now denoted as Tree info
This commit is contained in:
parent
bff123635e
commit
c39bed60c0
@ -71,7 +71,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
|
||||
,m_alignmentInfoStream(NULL)
|
||||
,m_inputFilePath(inputFilePath)
|
||||
,m_detailOutputCollector(NULL)
|
||||
,m_detailGhkmOutputCollector(NULL)
|
||||
,m_detailTreeFragmentsOutputCollector(NULL)
|
||||
,m_nBestOutputCollector(NULL)
|
||||
,m_searchGraphOutputCollector(NULL)
|
||||
,m_singleBestOutputCollector(NULL)
|
||||
@ -117,10 +117,10 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
|
||||
m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
|
||||
}
|
||||
|
||||
if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
|
||||
const std::string &path = staticData.GetDetailedGhkmTranslationReportingFilePath();
|
||||
m_detailedGhkmTranslationReportingStream = new std::ofstream(path.c_str());
|
||||
m_detailGhkmOutputCollector = new Moses::OutputCollector(m_detailedGhkmTranslationReportingStream);
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
|
||||
const std::string &path = staticData.GetDetailedTreeFragmentsTranslationReportingFilePath();
|
||||
m_detailedTreeFragmentsTranslationReportingStream = new std::ofstream(path.c_str());
|
||||
m_detailTreeFragmentsOutputCollector = new Moses::OutputCollector(m_detailedTreeFragmentsTranslationReportingStream);
|
||||
}
|
||||
|
||||
if (!staticData.GetAlignmentOutputFile().empty()) {
|
||||
@ -137,7 +137,7 @@ IOWrapper::~IOWrapper()
|
||||
}
|
||||
delete m_outputSearchGraphStream;
|
||||
delete m_detailedTranslationReportingStream;
|
||||
delete m_detailedGhkmTranslationReportingStream;
|
||||
delete m_detailedTreeFragmentsTranslationReportingStream;
|
||||
delete m_alignmentInfoStream;
|
||||
delete m_detailOutputCollector;
|
||||
delete m_nBestOutputCollector;
|
||||
@ -321,11 +321,11 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &
|
||||
}
|
||||
}
|
||||
|
||||
void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
|
||||
void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
|
||||
{
|
||||
// recursive
|
||||
if (hypo != NULL) {
|
||||
const std::string key = "GHKMParse";
|
||||
const std::string key = "Tree";
|
||||
std::string value;
|
||||
bool hasprop;
|
||||
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
|
||||
@ -340,11 +340,11 @@ void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationConte
|
||||
<< "-> " << hypo->GetCurrTargetPhrase()
|
||||
<< " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
|
||||
|
||||
out << std::endl;
|
||||
out << " ||| ";
|
||||
if (hasprop)
|
||||
out << " " << value;
|
||||
else
|
||||
out << " " << "noGHKMParseInfo";
|
||||
out << " " << "noTreeInfo";
|
||||
|
||||
out << std::endl;
|
||||
}
|
||||
@ -353,7 +353,7 @@ void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationConte
|
||||
std::vector<const ChartHypothesis*>::const_iterator iter;
|
||||
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis *prevHypo = *iter;
|
||||
OutputGhkmTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -373,7 +373,7 @@ void IOWrapper::OutputDetailedTranslationReport(
|
||||
m_detailOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapper::OutputDetailedGhkmTranslationReport(
|
||||
void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
@ -384,9 +384,9 @@ void IOWrapper::OutputDetailedGhkmTranslationReport(
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
|
||||
OutputGhkmTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
CHECK(m_detailGhkmOutputCollector);
|
||||
m_detailGhkmOutputCollector->Write(translationId, out.str());
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
CHECK(m_detailTreeFragmentsOutputCollector);
|
||||
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
|
||||
|
@ -70,12 +70,12 @@ protected:
|
||||
const Moses::FactorMask &m_inputFactorUsed;
|
||||
std::ostream *m_outputSearchGraphStream;
|
||||
std::ostream *m_detailedTranslationReportingStream;
|
||||
std::ostream *m_detailedGhkmTranslationReportingStream;
|
||||
std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
|
||||
std::ostream *m_alignmentInfoStream;
|
||||
std::string m_inputFilePath;
|
||||
std::istream *m_inputStream;
|
||||
Moses::OutputCollector *m_detailOutputCollector;
|
||||
Moses::OutputCollector *m_detailGhkmOutputCollector;
|
||||
Moses::OutputCollector *m_detailTreeFragmentsOutputCollector;
|
||||
Moses::OutputCollector *m_nBestOutputCollector;
|
||||
Moses::OutputCollector *m_searchGraphOutputCollector;
|
||||
Moses::OutputCollector *m_singleBestOutputCollector;
|
||||
@ -86,7 +86,7 @@ protected:
|
||||
size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
|
||||
void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
|
||||
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputGhkmTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void ReconstructApplicationContext(const Moses::ChartHypothesis &hypo,
|
||||
const Moses::Sentence &sentence,
|
||||
ApplicationContext &context);
|
||||
@ -117,7 +117,7 @@ public:
|
||||
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId);
|
||||
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
|
||||
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedGhkmTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void Backtrack(const Moses::ChartHypothesis *hypo);
|
||||
|
||||
void ResetTranslationId();
|
||||
|
@ -127,9 +127,9 @@ public:
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
|
||||
m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, translationId);
|
||||
}
|
||||
if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
|
||||
m_ioWrapper.OutputDetailedGhkmTranslationReport(bestHypo, sentence, translationId);
|
||||
m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(bestHypo, sentence, translationId);
|
||||
}
|
||||
|
||||
// n-best
|
||||
|
@ -96,8 +96,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
|
||||
targetPhrase->SetProperty("GHKMParse","( UNK "+sourceWord[0]->GetString().as_string()+" )");
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
|
||||
targetPhrase->SetProperty("Tree","( " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" )");
|
||||
}
|
||||
|
||||
// chart rule
|
||||
|
@ -67,7 +67,7 @@ Parameter::Parameter()
|
||||
AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
|
||||
AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
|
||||
AddParam("translation-details", "T", "for each best hypothesis, report translation details to the given file");
|
||||
AddParam("ghkm-translation-details", "Tghkm", "for each hypothesis, report removed internal nodes to given file");
|
||||
AddParam("tree-translation-details", "Ttree", "for each hypothesis, report translation details with tree fragment info to given file");
|
||||
AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
|
||||
AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
|
||||
AddParam("verbose", "v", "verbosity level of the logging");
|
||||
|
@ -60,7 +60,7 @@ StaticData::StaticData()
|
||||
,m_unknownWordPenaltyProducer(NULL)
|
||||
,m_inputFeature(NULL)
|
||||
,m_detailedTranslationReportingFilePath()
|
||||
,m_detailedGhkmTranslationReportingFilePath()
|
||||
,m_detailedTreeFragmentsTranslationReportingFilePath()
|
||||
,m_onlyDistinctNBest(false)
|
||||
,m_needAlignmentInfo(false)
|
||||
,m_factorDelimiter("|") // default delimiter between factors
|
||||
@ -308,12 +308,12 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (m_parameter->isParamSpecified("ghkm-translation-details")) {
|
||||
const vector<string> &args = m_parameter->GetParam("ghkm-translation-details");
|
||||
if (m_parameter->isParamSpecified("tree-translation-details")) {
|
||||
const vector<string> &args = m_parameter->GetParam("tree-translation-details");
|
||||
if (args.size() == 1) {
|
||||
m_detailedGhkmTranslationReportingFilePath = args[0];
|
||||
m_detailedTreeFragmentsTranslationReportingFilePath = args[0];
|
||||
} else {
|
||||
UserMessage::Add(string("the ghkm-translation-details option requires exactly one filename argument"));
|
||||
UserMessage::Add(string("the tree-translation-details option requires exactly one filename argument"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -137,7 +137,7 @@ protected:
|
||||
bool m_reportAllFactors;
|
||||
bool m_reportAllFactorsNBest;
|
||||
std::string m_detailedTranslationReportingFilePath;
|
||||
std::string m_detailedGhkmTranslationReportingFilePath;
|
||||
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
bool m_onlyDistinctNBest;
|
||||
bool m_PrintAlignmentInfo;
|
||||
bool m_needAlignmentInfo;
|
||||
@ -368,11 +368,11 @@ public:
|
||||
const std::string &GetDetailedTranslationReportingFilePath() const {
|
||||
return m_detailedTranslationReportingFilePath;
|
||||
}
|
||||
bool IsDetailedGhkmTranslationReportingEnabled() const {
|
||||
return !m_detailedGhkmTranslationReportingFilePath.empty();
|
||||
bool IsDetailedTreeFragmentsTranslationReportingEnabled() const {
|
||||
return !m_detailedTreeFragmentsTranslationReportingFilePath.empty();
|
||||
}
|
||||
const std::string &GetDetailedGhkmTranslationReportingFilePath() const {
|
||||
return m_detailedGhkmTranslationReportingFilePath;
|
||||
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
|
||||
return m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
}
|
||||
bool IsLabeledNBestList() const {
|
||||
return m_labeledNBestList;
|
||||
|
@ -26,7 +26,7 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
|
||||
std::vector<float>& denseValues,
|
||||
std::map<std::string,float>& sparseValues) const{
|
||||
for(size_t i=0; i<context.phrasePair.size(); i++) {
|
||||
add(&context.phrasePair[i]->ghkmParse, denseValues, sparseValues);
|
||||
add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
|
||||
{
|
||||
assert(phraseS.empty());
|
||||
assert(phraseT.empty());
|
||||
ghkmParse.clear();
|
||||
treeFragment.clear();
|
||||
|
||||
vector< string > token = tokenize( line );
|
||||
int item = 1;
|
||||
@ -109,11 +109,11 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
|
||||
alignedToT[t].insert( s );
|
||||
alignedToS[s].insert( t );
|
||||
}
|
||||
} else if ( (item >= 4) && (token[j] == "GHKMParse") ) { // check for information with a key field
|
||||
} else if ( (item >= 4) && (token[j] == "Tree") ) { // check for information with a key field
|
||||
++j;
|
||||
while ( (j < token.size() ) && (token[j] != "|||") ) {
|
||||
ghkmParse.append(" ");
|
||||
ghkmParse.append(token[j]);
|
||||
treeFragment.append(" ");
|
||||
treeFragment.append(token[j]);
|
||||
++j;
|
||||
}
|
||||
--j;
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
float count;
|
||||
int sentenceId;
|
||||
std::string domain;
|
||||
std::string ghkmParse;
|
||||
std::string treeFragment;
|
||||
|
||||
std::vector< std::set<size_t> > alignedToT;
|
||||
std::vector< std::set<size_t> > alignedToS;
|
||||
|
@ -446,22 +446,22 @@ void ExtractGHKM::WriteGlueGrammar(
|
||||
}
|
||||
|
||||
// basic rules
|
||||
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| " << std::endl;
|
||||
out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << std::endl;
|
||||
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| ||| ||| ||| {{Tree ( " << topLabel << " ( SSTART <s> ) )}}" << std::endl;
|
||||
out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| ||| {{Tree ( " << topLabel << " ( SEND </s> ) )}}" << std::endl;
|
||||
|
||||
// top rules
|
||||
for (std::map<std::string, int>::const_iterator i = topLabelSet.begin();
|
||||
i != topLabelSet.end(); ++i) {
|
||||
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << std::endl;
|
||||
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( SSTART <s> ) ( " << i->first << " ) ( SEND </s> ) )}}" << std::endl;
|
||||
}
|
||||
|
||||
// glue rules
|
||||
for(std::set<std::string>::const_iterator i = labelSet.begin();
|
||||
i != labelSet.end(); i++ ) {
|
||||
out << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << std::endl;
|
||||
out << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( "<< topLabel << " ) ( " << *i << " ) )}}" << std::endl;
|
||||
}
|
||||
// glue rule for unknown word...
|
||||
out << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << std::endl;
|
||||
out << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( X ) )}}" << std::endl;
|
||||
}
|
||||
|
||||
void ExtractGHKM::CollectWordLabelCounts(
|
||||
|
@ -168,7 +168,7 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
|
||||
void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g)
|
||||
{
|
||||
Write(rule,false);
|
||||
m_fwd << " GHKMParse ";
|
||||
m_fwd << " Tree ";
|
||||
g.PrintTree(m_fwd);
|
||||
m_fwd << std::endl;
|
||||
m_inv << std::endl;
|
||||
|
@ -49,7 +49,7 @@ LexicalTable lexTable;
|
||||
bool inverseFlag = false;
|
||||
bool hierarchicalFlag = false;
|
||||
bool pcfgFlag = false;
|
||||
bool ghkmParseFlag = false;
|
||||
bool treeFragmentsFlag = false;
|
||||
bool unpairedExtractFormatFlag = false;
|
||||
bool conditionOnTargetLhsFlag = false;
|
||||
bool wordAlignmentFlag = true;
|
||||
@ -78,7 +78,7 @@ vector<string> tokenize( const char [] );
|
||||
void writeCountOfCounts( const string &fileNameCountOfCounts );
|
||||
void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLog);
|
||||
const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrasePair );
|
||||
const std::string &findBestGHKMParse(const PhraseAlignmentCollection &phrasePair );
|
||||
const std::string &findBestTreeFragment(const PhraseAlignmentCollection &phrasePair );
|
||||
void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLog );
|
||||
double computeLexicalTranslation( const PHRASE &, const PHRASE &, const PhraseAlignment & );
|
||||
double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
|
||||
@ -98,7 +98,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
ScoreFeatureManager featureManager;
|
||||
if (argc < 4) {
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--GHKMParse] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--TreeFragments] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
|
||||
cerr << featureManager.usage() << endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -119,9 +119,9 @@ int main(int argc, char* argv[])
|
||||
} else if (strcmp(argv[i],"--PCFG") == 0) {
|
||||
pcfgFlag = true;
|
||||
cerr << "including PCFG scores\n";
|
||||
} else if (strcmp(argv[i],"--GHKMParse") == 0) {
|
||||
ghkmParseFlag = true;
|
||||
cerr << "including GHKM parse\n";
|
||||
} else if (strcmp(argv[i],"--TreeFragments") == 0) {
|
||||
treeFragmentsFlag = true;
|
||||
cerr << "including tree fragments from syntactic parse\n";
|
||||
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
|
||||
unpairedExtractFormatFlag = true;
|
||||
cerr << "processing unpaired extract format\n";
|
||||
@ -381,27 +381,27 @@ const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrase
|
||||
return *bestAlignment;
|
||||
}
|
||||
|
||||
const std::string &findBestGHKMParse(const PhraseAlignmentCollection &phrasePair )
|
||||
const std::string &findBestTreeFragment(const PhraseAlignmentCollection &phrasePair )
|
||||
{
|
||||
float bestGHKMParseCount = -1;
|
||||
PhraseAlignment *bestGHKMParse = NULL;
|
||||
float bestTreeFragmentCount = -1;
|
||||
PhraseAlignment *bestTreeFragment = NULL;
|
||||
|
||||
for(size_t i=0; i<phrasePair.size(); i++) {
|
||||
size_t ghkmParseInd;
|
||||
size_t treeFragmentInd;
|
||||
if (inverseFlag) {
|
||||
// count backwards, so that alignments for ties will be the same for both normal & inverse scores
|
||||
ghkmParseInd = phrasePair.size() - i - 1;
|
||||
treeFragmentInd = phrasePair.size() - i - 1;
|
||||
} else {
|
||||
ghkmParseInd = i;
|
||||
treeFragmentInd = i;
|
||||
}
|
||||
|
||||
if (phrasePair[ghkmParseInd]->count > bestGHKMParseCount) {
|
||||
bestGHKMParseCount = phrasePair[ghkmParseInd]->count;
|
||||
bestGHKMParse = phrasePair[ghkmParseInd];
|
||||
if (phrasePair[treeFragmentInd]->count > bestTreeFragmentCount) {
|
||||
bestTreeFragmentCount = phrasePair[treeFragmentInd]->count;
|
||||
bestTreeFragment = phrasePair[treeFragmentInd];
|
||||
}
|
||||
}
|
||||
|
||||
return bestGHKMParse->ghkmParse;
|
||||
return bestTreeFragment->treeFragment;
|
||||
}
|
||||
|
||||
|
||||
@ -708,12 +708,11 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// GHKM parse
|
||||
if (ghkmParseFlag && !inverseFlag) {
|
||||
const std::string &bestGHKMParse = findBestGHKMParse( phrasePair );
|
||||
if ( !bestGHKMParse.empty() )
|
||||
phraseTableFile << " ||| {{GHKMParse" << bestGHKMParse << "}}";
|
||||
// tree fragments
|
||||
if (treeFragmentsFlag && !inverseFlag) {
|
||||
const std::string &bestTreeFragment = findBestTreeFragment( phrasePair );
|
||||
if ( !bestTreeFragment.empty() )
|
||||
phraseTableFile << " ||| {{Tree " << bestTreeFragment << "}}";
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user