Merge branch 'master' into weight-new

This commit is contained in:
Hieu Hoang 2012-12-24 23:28:48 +00:00
commit b9c76342e9
7 changed files with 148 additions and 9 deletions

View File

@ -13,8 +13,8 @@ void PhraseLengthFeature::Evaluate(
ScoreComponentCollection* accumulator) const
{
// get length of source and target phrase
size_t sourceLength = context.GetTargetPhrase().GetSize();
size_t targetLength = context.GetTranslationOption().GetSourcePhrase()->GetSize();
size_t targetLength = context.GetTargetPhrase().GetSize();
size_t sourceLength = context.GetTranslationOption().GetSourceWordsRange().GetNumWordsCovered();
// create feature names
stringstream nameSource;

View File

@ -0,0 +1,102 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/test/unit_test.hpp>
#include "FactorCollection.h"
#include "PhraseLengthFeature.h"
#include "Sentence.h"
#include "TargetPhrase.h"
#include "TranslationOption.h"
using namespace Moses;
using namespace std;
BOOST_AUTO_TEST_SUITE(phrase_length_feature)
//TODO: Factor out setup code so that it can be reused
static Word MakeWord(string text) {
FactorCollection &factorCollection = FactorCollection::Instance();
const Factor* f = factorCollection.AddFactor(Input,0,text);
Word w;
w.SetFactor(0,f);
return w;
}
BOOST_AUTO_TEST_CASE(evaluate) {
Word w1 = MakeWord("w1");
Word w2 = MakeWord("y2");
Word w3 = MakeWord("x3");
Word w4 = MakeWord("w4");
Phrase p1;
p1.AddWord(w1);
p1.AddWord(w3);
p1.AddWord(w4);
Phrase p2;
p2.AddWord(w1);
p2.AddWord(w2);
Phrase p3;
p3.AddWord(w2);
p3.AddWord(w1);
p3.AddWord(w4);
p3.AddWord(w4);
TargetPhrase tp1(p1);
TargetPhrase tp2(p2);
TargetPhrase tp3(p3);
Sentence sentence;
vector<FactorType> order;
order.push_back(0);
stringstream in("the input sentence has 6 words");
sentence.Read(in, order);
TranslationOption topt1(WordsRange(0,0), tp1, sentence);
TranslationOption topt2(WordsRange(1,3), tp2, sentence);
TranslationOption topt3(WordsRange(2,3), tp3, sentence);
PhraseBasedFeatureContext context1(topt1,sentence);
PhraseBasedFeatureContext context2(topt2,sentence);
PhraseBasedFeatureContext context3(topt3,sentence);
PhraseLengthFeature plf;
ScoreComponentCollection acc1,acc2,acc3;
plf.Evaluate(context1, &acc1);
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "s1"),1);
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "t3"),1);
BOOST_CHECK_EQUAL(acc1.GetScoreForProducer(&plf, "1,3"),1);
plf.Evaluate(context2, &acc2);
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "s3"),1);
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "t2"),1);
BOOST_CHECK_EQUAL(acc2.GetScoreForProducer(&plf, "3,2"),1);
plf.Evaluate(context3, &acc3);
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "s2"),1);
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "t4"),1);
BOOST_CHECK_EQUAL(acc3.GetScoreForProducer(&plf, "2,4"),1);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -46,6 +46,7 @@ class PhraseExtractionOptions {
bool includeSentenceIdFlag; //include sentence id in extract file
bool onlyOutputSpanInfo;
bool gzOutput;
std::string instanceWeightsFile; //weights for each sentence
public:
PhraseExtractionOptions(const int initmaxPhraseLength):
@ -99,7 +100,11 @@ public:
}
void initGzOutput (const bool initgzOutput){
gzOutput= initgzOutput;
}
}
void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
instanceWeightsFile = std::string(initInstanceWeightsFile);
}
// functions for getting values
bool isAllModelsOutputFlag() const {
return allModelsOutputFlag;
@ -136,7 +141,10 @@ public:
}
bool isGzOutput () const {
return gzOutput;
}
}
std::string getInstanceWeightsFile() const {
return instanceWeightsFile;
}
};
}

View File

@ -54,10 +54,11 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo
return true;
}
bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], int sentenceID, bool boundaryRules)
bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], char weightString[], int sentenceID, bool boundaryRules)
{
using namespace std;
this->sentenceID = sentenceID;
this->weightString = std::string(weightString);
// process sentence strings and store in target and source members.
if (!processTargetSentence(targetString, sentenceID, boundaryRules)) {

View File

@ -35,6 +35,7 @@ public:
std::vector<int> alignedCountS;
std::vector<std::vector<int> > alignedToT;
int sentenceID;
std::string weightString;
virtual ~SentenceAlignment();
@ -43,7 +44,7 @@ public:
virtual bool processSourceSentence(const char *, int, bool boundaryRules);
bool create(char targetString[], char sourceString[],
char alignmentString[], int sentenceID, bool boundaryRules);
char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
};

View File

@ -114,7 +114,7 @@ int main(int argc, char* argv[])
if (argc < 6) {
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n ]\n";
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
exit(1);
}
@ -144,6 +144,12 @@ int main(int argc, char* argv[])
sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i], "--GZOutput") == 0) {
options.initGzOutput(true);
} else if (strcmp(argv[i], "--InstanceWeights") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
exit(1);
}
options.initInstanceWeightsFile(argv[++i]);
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@ -220,6 +226,13 @@ int main(int argc, char* argv[])
istream *fFileP = &fFile;
istream *aFileP = &aFile;
istream *iwFileP = NULL;
auto_ptr<Moses::InputFileStream> instanceWeightsFile;
if (options.getInstanceWeightsFile().length()) {
instanceWeightsFile.reset(new Moses::InputFileStream(options.getInstanceWeightsFile()));
iwFileP = instanceWeightsFile.get();
}
// open output files
if (options.isTranslationFlag()) {
string fileNameExtractInv = fileNameExtract + ".inv" + (options.isGzOutput()?".gz":"");
@ -238,10 +251,14 @@ int main(int argc, char* argv[])
char englishString[LINE_MAX_LENGTH];
char foreignString[LINE_MAX_LENGTH];
char alignmentString[LINE_MAX_LENGTH];
char weightString[LINE_MAX_LENGTH];
SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
if (eFileP->eof()) break;
SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
if (iwFileP) {
SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
}
SentenceAlignment sentence;
// cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
//az: output src, tgt, and alingment line
@ -251,7 +268,7 @@ int main(int argc, char* argv[])
cout << "LOG: ALT: " << alignmentString << endl;
cout << "LOG: PHRASES_BEGIN:" << endl;
}
if (sentence.create( englishString, foreignString, alignmentString, i, false)) {
if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
task->Run();
delete task;
@ -695,6 +712,16 @@ for(int fi=startF; fi<=endF; fi++) {
if (m_options.isOrientationFlag())
outextractstrOrientation << orientationInfo;
if (m_options.getInstanceWeightsFile().length()) {
if (m_options.isTranslationFlag()) {
outextractstr << " ||| " << sentence.weightString;
outextractstrInv << " ||| " << sentence.weightString;
}
if (m_options.isOrientationFlag()) {
outextractstrOrientation << " ||| " << sentence.weightString;
}
}
if (m_options.isIncludeSentenceIdFlag()) {
outextractstr << " ||| " << sentence.sentenceID;
}

View File

@ -337,7 +337,7 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
if (sentence.create(targetString, sourceString, alignmentString, i, options.boundaryRules)) {
if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) {
if (options.unknownWordLabelFlag) {
collectWordLabelCounts(sentence);
}