mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 22:45:50 +03:00
Merge branch 'master' into weight-new
This commit is contained in:
commit
b9c76342e9
@ -13,8 +13,8 @@ void PhraseLengthFeature::Evaluate(
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
// get length of source and target phrase
|
||||
size_t sourceLength = context.GetTargetPhrase().GetSize();
|
||||
size_t targetLength = context.GetTranslationOption().GetSourcePhrase()->GetSize();
|
||||
size_t targetLength = context.GetTargetPhrase().GetSize();
|
||||
size_t sourceLength = context.GetTranslationOption().GetSourceWordsRange().GetNumWordsCovered();
|
||||
|
||||
// create feature names
|
||||
stringstream nameSource;
|
||||
|
102
moses/PhraseLengthFeatureTest.cpp
Normal file
102
moses/PhraseLengthFeatureTest.cpp
Normal file
@ -0,0 +1,102 @@
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2010 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "FactorCollection.h"
|
||||
#include "PhraseLengthFeature.h"
|
||||
#include "Sentence.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TranslationOption.h"
|
||||
|
||||
using namespace Moses;
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(phrase_length_feature)
|
||||
|
||||
//TODO: Factor out setup code so that it can be reused
|
||||
|
||||
static Word MakeWord(string text) {
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
const Factor* f = factorCollection.AddFactor(Input,0,text);
|
||||
Word w;
|
||||
w.SetFactor(0,f);
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(evaluate) {
|
||||
Word w1 = MakeWord("w1");
|
||||
Word w2 = MakeWord("y2");
|
||||
Word w3 = MakeWord("x3");
|
||||
Word w4 = MakeWord("w4");
|
||||
|
||||
Phrase p1;
|
||||
p1.AddWord(w1);
|
||||
p1.AddWord(w3);
|
||||
p1.AddWord(w4);
|
||||
|
||||
Phrase p2;
|
||||
p2.AddWord(w1);
|
||||
p2.AddWord(w2);
|
||||
|
||||
Phrase p3;
|
||||
p3.AddWord(w2);
|
||||
p3.AddWord(w1);
|
||||
p3.AddWord(w4);
|
||||
p3.AddWord(w4);
|
||||
|
||||
TargetPhrase tp1(p1);
|
||||
TargetPhrase tp2(p2);
|
||||
TargetPhrase tp3(p3);
|
||||
|
||||
Sentence sentence;
|
||||
vector<FactorType> order;
|
||||
order.push_back(0);
|
||||
stringstream in("the input sentence has 6 words");
|
||||
sentence.Read(in, order);
|
||||
|
||||
TranslationOption topt1(WordsRange(0,0), tp1, sentence);
|
||||
TranslationOption topt2(WordsRange(1,3), tp2, sentence);
|
||||
TranslationOption topt3(WordsRange(2,3), tp3, sentence);
|
||||
|
||||
PhraseBasedFeatureContext context1(topt1,sentence);
|
||||
PhraseBasedFeatureContext context2(topt2,sentence);
|
||||
PhraseBasedFeatureContext context3(topt3,sentence);
|
||||
|
||||
PhraseLengthFeature plf;
|
||||
|
||||
ScoreComponentCollection acc1,acc2,acc3;
|
||||
|
||||
plf.Evaluate(context1, &acc1);
|
||||
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "s1"),1);
|
||||
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "t3"),1);
|
||||
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "1,3"),1);
|
||||
|
||||
plf.Evaluate(context2, &acc2);
|
||||
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "s3"),1);
|
||||
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "t2"),1);
|
||||
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "3,2"),1);
|
||||
|
||||
plf.Evaluate(context3, &acc3);
|
||||
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "s2"),1);
|
||||
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "t4"),1);
|
||||
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "2,4"),1);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
@ -46,6 +46,7 @@ class PhraseExtractionOptions {
|
||||
bool includeSentenceIdFlag; //include sentence id in extract file
|
||||
bool onlyOutputSpanInfo;
|
||||
bool gzOutput;
|
||||
std::string instanceWeightsFile; //weights for each sentence
|
||||
|
||||
public:
|
||||
PhraseExtractionOptions(const int initmaxPhraseLength):
|
||||
@ -99,7 +100,11 @@ public:
|
||||
}
|
||||
void initGzOutput (const bool initgzOutput){
|
||||
gzOutput= initgzOutput;
|
||||
}
|
||||
}
|
||||
void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
|
||||
instanceWeightsFile = std::string(initInstanceWeightsFile);
|
||||
}
|
||||
|
||||
// functions for getting values
|
||||
bool isAllModelsOutputFlag() const {
|
||||
return allModelsOutputFlag;
|
||||
@ -136,7 +141,10 @@ public:
|
||||
}
|
||||
bool isGzOutput () const {
|
||||
return gzOutput;
|
||||
}
|
||||
}
|
||||
std::string getInstanceWeightsFile() const {
|
||||
return instanceWeightsFile;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -54,10 +54,11 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], int sentenceID, bool boundaryRules)
|
||||
bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], char weightString[], int sentenceID, bool boundaryRules)
|
||||
{
|
||||
using namespace std;
|
||||
this->sentenceID = sentenceID;
|
||||
this->weightString = std::string(weightString);
|
||||
|
||||
// process sentence strings and store in target and source members.
|
||||
if (!processTargetSentence(targetString, sentenceID, boundaryRules)) {
|
||||
|
@ -35,6 +35,7 @@ public:
|
||||
std::vector<int> alignedCountS;
|
||||
std::vector<std::vector<int> > alignedToT;
|
||||
int sentenceID;
|
||||
std::string weightString;
|
||||
|
||||
virtual ~SentenceAlignment();
|
||||
|
||||
@ -43,7 +44,7 @@ public:
|
||||
virtual bool processSourceSentence(const char *, int, bool boundaryRules);
|
||||
|
||||
bool create(char targetString[], char sourceString[],
|
||||
char alignmentString[], int sentenceID, bool boundaryRules);
|
||||
char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
|
||||
|
||||
};
|
||||
|
||||
|
@ -114,7 +114,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
if (argc < 6) {
|
||||
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
|
||||
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n ]\n";
|
||||
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -144,6 +144,12 @@ int main(int argc, char* argv[])
|
||||
sentenceOffset = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "--GZOutput") == 0) {
|
||||
options.initGzOutput(true);
|
||||
} else if (strcmp(argv[i], "--InstanceWeights") == 0) {
|
||||
if (i+1 >= argc) {
|
||||
cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
|
||||
exit(1);
|
||||
}
|
||||
options.initInstanceWeightsFile(argv[++i]);
|
||||
} else if(strcmp(argv[i],"--model") == 0) {
|
||||
if (i+1 >= argc) {
|
||||
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
|
||||
@ -220,6 +226,13 @@ int main(int argc, char* argv[])
|
||||
istream *fFileP = &fFile;
|
||||
istream *aFileP = &aFile;
|
||||
|
||||
istream *iwFileP = NULL;
|
||||
auto_ptr<Moses::InputFileStream> instanceWeightsFile;
|
||||
if (options.getInstanceWeightsFile().length()) {
|
||||
instanceWeightsFile.reset(new Moses::InputFileStream(options.getInstanceWeightsFile()));
|
||||
iwFileP = instanceWeightsFile.get();
|
||||
}
|
||||
|
||||
// open output files
|
||||
if (options.isTranslationFlag()) {
|
||||
string fileNameExtractInv = fileNameExtract + ".inv" + (options.isGzOutput()?".gz":"");
|
||||
@ -238,10 +251,14 @@ int main(int argc, char* argv[])
|
||||
char englishString[LINE_MAX_LENGTH];
|
||||
char foreignString[LINE_MAX_LENGTH];
|
||||
char alignmentString[LINE_MAX_LENGTH];
|
||||
char weightString[LINE_MAX_LENGTH];
|
||||
SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
|
||||
if (eFileP->eof()) break;
|
||||
SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
|
||||
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
|
||||
if (iwFileP) {
|
||||
SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
|
||||
}
|
||||
SentenceAlignment sentence;
|
||||
// cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
|
||||
//az: output src, tgt, and alingment line
|
||||
@ -251,7 +268,7 @@ int main(int argc, char* argv[])
|
||||
cout << "LOG: ALT: " << alignmentString << endl;
|
||||
cout << "LOG: PHRASES_BEGIN:" << endl;
|
||||
}
|
||||
if (sentence.create( englishString, foreignString, alignmentString, i, false)) {
|
||||
if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
|
||||
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
|
||||
task->Run();
|
||||
delete task;
|
||||
@ -695,6 +712,16 @@ for(int fi=startF; fi<=endF; fi++) {
|
||||
if (m_options.isOrientationFlag())
|
||||
outextractstrOrientation << orientationInfo;
|
||||
|
||||
if (m_options.getInstanceWeightsFile().length()) {
|
||||
if (m_options.isTranslationFlag()) {
|
||||
outextractstr << " ||| " << sentence.weightString;
|
||||
outextractstrInv << " ||| " << sentence.weightString;
|
||||
}
|
||||
if (m_options.isOrientationFlag()) {
|
||||
outextractstrOrientation << " ||| " << sentence.weightString;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_options.isIncludeSentenceIdFlag()) {
|
||||
outextractstr << " ||| " << sentence.sentenceID;
|
||||
}
|
||||
|
@ -337,7 +337,7 @@ int main(int argc, char* argv[])
|
||||
cout << "LOG: PHRASES_BEGIN:" << endl;
|
||||
}
|
||||
|
||||
if (sentence.create(targetString, sourceString, alignmentString, i, options.boundaryRules)) {
|
||||
if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) {
|
||||
if (options.unknownWordLabelFlag) {
|
||||
collectWordLabelCounts(sentence);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user