mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
Add --PhrasePairCount option to score.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4032 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
0484d43a22
commit
108dc4d12e
@ -16,7 +16,7 @@ extract-rules: tables-core.o SentenceAlignment.o SentenceAlignmentWithSyntax.o S
|
||||
score: tables-core.o AlignmentPhrase.o score.o PhraseAlignment.o InputFileStream.o
|
||||
$(CXX) $^ -lz -o score
|
||||
|
||||
consolidate: consolidate.o
|
||||
consolidate: consolidate.o tables-core.o
|
||||
$(CXX) $^ -o consolidate
|
||||
|
||||
consolidate-direct: consolidate-direct.o InputFileStream.o
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "tables-core.h"
|
||||
#include "SafeGetline.h"
|
||||
|
||||
#define LINE_MAX_LENGTH 10000
|
||||
@ -118,23 +119,6 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
// indirect: source target probabilities
|
||||
|
||||
// consistency checks
|
||||
/*
|
||||
size_t expectedSize = (hierarchicalFlag ? 5 : 4);
|
||||
if (itemDirect.size() != expectedSize)
|
||||
{
|
||||
cerr << "ERROR: expected " << expectedSize << " items in file "
|
||||
<< fileNameDirect << ", line " << i << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (itemIndirect.size() != 4)
|
||||
{
|
||||
cerr << "ERROR: expected 4 items in file "
|
||||
<< fileNameIndirect << ", line " << i << endl;
|
||||
exit(1);
|
||||
}
|
||||
*/
|
||||
|
||||
if (itemDirect[0].compare( itemIndirect[0] ) != 0) {
|
||||
cerr << "ERROR: target phrase does not match in line " << i << ": '"
|
||||
<< itemDirect[0] << "' != '" << itemIndirect[0] << "'" << endl;
|
||||
@ -164,8 +148,15 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
|
||||
fileConsolidated << " ||| " << itemDirect[3];
|
||||
|
||||
// counts, for debugging
|
||||
fileConsolidated << "||| " << itemIndirect[4] << " " // indirect
|
||||
<< itemDirect[4]; // direct
|
||||
vector<string> directCounts = tokenize(itemDirect[4].c_str());
|
||||
vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
|
||||
fileConsolidated << "||| " << indirectCounts[0] << " " << directCounts[0];
|
||||
// output rule count if present in either file
|
||||
if (directCounts.size() > 1) {
|
||||
fileConsolidated << " " << directCounts[1];
|
||||
} else if (indirectCounts.size() > 1) {
|
||||
fileConsolidated << " " << indirectCounts[1];
|
||||
}
|
||||
|
||||
fileConsolidated << endl;
|
||||
}
|
||||
|
@ -74,6 +74,7 @@ bool lexFlag = true;
|
||||
int countOfCounts[GT_MAX+1];
|
||||
float discountFactor[GT_MAX+1];
|
||||
int maxLinesGTDiscount = -1;
|
||||
bool ruleCountFlag = false;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@ -81,7 +82,7 @@ int main(int argc, char* argv[])
|
||||
<< "scoring methods for extracted rules\n";
|
||||
|
||||
if (argc < 4) {
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--WordAlignment [--MaxLinesGTDiscount num] file]\n";
|
||||
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--WordAlignment] [--MaxLinesGTDiscount num] [--RuleCount]\n";
|
||||
exit(1);
|
||||
}
|
||||
char* fileNameExtract = argv[1];
|
||||
@ -115,6 +116,9 @@ int main(int argc, char* argv[])
|
||||
++i;
|
||||
maxLinesGTDiscount = atoi(argv[i]);
|
||||
cerr << "maxLinesGTDiscount=" << maxLinesGTDiscount << endl;
|
||||
} else if (strcmp(argv[i],"--RuleCount") == 0) {
|
||||
ruleCountFlag = true;
|
||||
cerr << "outputting rule counts" << endl;
|
||||
} else {
|
||||
cerr << "ERROR: unknown option " << argv[i] << endl;
|
||||
exit(1);
|
||||
@ -345,6 +349,7 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
|
||||
for(size_t i=0; i<phrasePair.size(); i++) {
|
||||
count += phrasePair[i]->count;
|
||||
}
|
||||
const float originalCount = count;
|
||||
|
||||
const PHRASE &phraseS = phrasePair[0]->GetSource();
|
||||
const PHRASE &phraseT = phrasePair[0]->GetTarget();
|
||||
@ -418,6 +423,9 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
|
||||
}
|
||||
|
||||
phraseTableFile << " ||| " << totalCount;
|
||||
if (ruleCountFlag) {
|
||||
phraseTableFile << " " << originalCount;
|
||||
}
|
||||
phraseTableFile << endl;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user