This commit is contained in:
Nicola Bertoldi 2014-05-19 15:35:08 +02:00
parent 2f3cd5e2fe
commit 20b3e8929e
17 changed files with 240 additions and 234 deletions

View File

@ -221,7 +221,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
phrase.AddWord(word);
if (retSourceTarget == 1) {
out = word;
out = word;
}
}
@ -232,7 +232,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
phrase.AddWord(word);
if (retSourceTarget == 2) {
out = word;
out = word;
}
}

View File

@ -34,7 +34,8 @@ namespace MosesTuning
#define CHILD_STDOUT_WRITE pipefds_output[1]
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config) {
: StatisticsBasedScorer("METEOR",config)
{
meteor_jar = getConfig("jar", "");
meteor_lang = getConfig("lang", "en");
meteor_task = getConfig("task", "tune");
@ -88,7 +89,8 @@ MeteorScorer::MeteorScorer(const string& config)
m_from_meteor = new ifdstream(CHILD_STDOUT_READ);
}
MeteorScorer::~MeteorScorer() {
MeteorScorer::~MeteorScorer()
{
// Cleanup IO
delete m_to_meteor;
delete m_from_meteor;
@ -171,7 +173,8 @@ float MeteorScorer::calculateScore(const vector<int>& comps) const
// Meteor unsupported, throw error if used
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config) {
: StatisticsBasedScorer("METEOR",config)
{
throw runtime_error("Meteor unsupported, requires GLIBCXX");
}

View File

@ -35,7 +35,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
m_fromFilter(NULL)
{
#if defined __MINGW32__
//TODO(jie): replace this function with boost implementation
//TODO(jie): replace this function with boost implementation
#else
// Child error signal install
// sigaction is the replacement for the traditional signal() method

View File

@ -132,7 +132,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
if (!staticData.GetOutputUnknownsFile().empty()) {
@ -140,7 +140,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream);
UTIL_THROW_IF2(!m_unknownsStream->good(),
"File for unknowns words could not be opened: " <<
staticData.GetOutputUnknownsFile());
staticData.GetOutputUnknownsFile());
}
}
@ -188,7 +188,7 @@ InputType*IOWrapper::GetInput(InputType* inputType)
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
@ -197,12 +197,12 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
"Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
"Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
@ -246,7 +246,7 @@ void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, l
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i];
UTIL_THROW_IF(factor == NULL, util::Exception,
"No factor at position " << i );
"No factor at position " << i );
cout << *factor << " ";
}
@ -403,7 +403,7 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
@ -459,7 +459,7 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
@ -476,7 +476,7 @@ void IOWrapper::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailOutputCollector == NULL,
"No ouput file for detailed reports specified");
"No ouput file for detailed reports specified");
m_detailOutputCollector->Write(translationId, out.str());
}
@ -493,7 +493,7 @@ void IOWrapper::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
UTIL_THROW_IF2(m_detailOutputCollector == NULL,
"No ouput file for detailed reports specified");
"No ouput file for detailed reports specified");
m_detailOutputCollector->Write(translationId, out.str());
}
@ -510,18 +510,18 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
"No output file for tree fragments specified");
"No output file for tree fragments specified");
//Tree of full sentence
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
if (treeStructure != NULL) {
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
if (sff[i] == treeStructure) {
if (sff[i] == treeStructure) {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break;
}
}
}
}
@ -542,7 +542,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
"No output file for tree fragments specified");
"No output file for tree fragments specified");
//Tree of full sentence
//TODO: incremental search doesn't support stateful features
@ -581,7 +581,7 @@ void IOWrapper::OutputDetailedAllTranslationReport(
}
}
UTIL_THROW_IF2(m_detailAllOutputCollector == NULL,
"No output file for details specified");
"No output file for details specified");
m_detailAllOutputCollector->Write(translationId, out.str());
}
@ -609,7 +609,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
@ -641,7 +641,7 @@ void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
@ -730,7 +730,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
// delete 1st & last
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -805,7 +805,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
StaticData::Instance().PrintAlignmentInfoInNbest();
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
@ -816,7 +816,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -858,7 +858,7 @@ void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -980,9 +980,9 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT
}
size_t IOWrapper::OutputAlignmentNBest(
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget)
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget)
{
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
@ -1023,7 +1023,7 @@ size_t IOWrapper::OutputAlignmentNBest(
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
@ -1114,7 +1114,7 @@ size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypoth
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];

View File

@ -234,8 +234,7 @@ static void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -243,8 +242,7 @@ static void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}

View File

@ -254,7 +254,7 @@ public:
fileName << "." << compression;
}
boost::iostreams::filtering_ostream *file
= new boost::iostreams::filtering_ostream;
= new boost::iostreams::filtering_ostream;
if ( compression == "gz" ) {
file->push( boost::iostreams::gzip_compressor() );
@ -262,8 +262,8 @@ public:
file->push( boost::iostreams::bzip2_compressor() );
} else if ( compression != "txt" ) {
TRACE_ERR("Unrecognized hypergraph compression format ("
<< compression
<< ") - using uncompressed plain txt" << std::endl);
<< compression
<< ") - using uncompressed plain txt" << std::endl);
compression = "txt";
}
@ -275,9 +275,9 @@ public:
file -> flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
<< " because the output file " << fileName.str()
<< " is not open or not ready for writing"
<< std::endl);
<< " because the output file " << fileName.str()
<< " is not open or not ready for writing"
<< std::endl);
}
file -> pop();
delete file;
@ -504,8 +504,7 @@ static void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -513,8 +512,7 @@ static void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -604,15 +602,15 @@ int main(int argc, char** argv)
exit(1);
}
std::cerr <<"Before StaticData::LoadDataStatic" << std::endl;
std::cerr <<"Before StaticData::LoadDataStatic" << std::endl;
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(&params, argv[0])) {
exit(1);
}
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
std::cerr <<"Before ShowWeights" << std::endl;
std::cerr <<"Before ShowWeights" << std::endl;
// setting "-show-weights" -> just dump out weights and exit
if (params.isParamSpecified("show-weights")) {
ShowWeights();

View File

@ -56,8 +56,8 @@ DomainFeature::DomainFeature(const string& domainFile) : m_propertyKey("domain")
}
void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
float count,
int sentenceId) const
{
std::string value = m_domain.getDomainOfSentence(sentenceId);
phrasePair.AddProperty(m_propertyKey, value, count);

View File

@ -29,7 +29,8 @@
using namespace std;
namespace MosesTraining {
namespace MosesTraining
{
extern Vocabulary vcbT;
@ -39,13 +40,13 @@ extern bool hierarchicalFlag;
ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum ) :
m_phraseSource(phraseSource),
m_phraseTarget(phraseTarget),
m_count(count),
m_pcfgSum(pcfgSum)
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum ) :
m_phraseSource(phraseSource),
m_phraseTarget(phraseTarget),
m_count(count),
m_pcfgSum(pcfgSum)
{
assert(phraseSource->empty());
assert(phraseTarget->empty());
@ -54,7 +55,7 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
m_pcfgSum = pcfgSum;
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_lastTargetToSourceAlignment = insertedAlignment.first;
m_lastCount = m_count;
@ -64,7 +65,8 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
}
ExtractionPhrasePair::~ExtractionPhrasePair( ) {
ExtractionPhrasePair::~ExtractionPhrasePair( )
{
Clear();
}
@ -86,7 +88,7 @@ bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
return false;
} else {
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
if ( !insertedAlignment.second ) {
// the alignment already exists: increment count
insertedAlignment.first->second += count;

View File

@ -24,13 +24,15 @@
#include <set>
#include <map>
namespace MosesTraining {
namespace MosesTraining
{
typedef std::vector< std::set<size_t> > ALIGNMENT;
class ExtractionPhrasePair {
class ExtractionPhrasePair
{
protected:
@ -48,7 +50,7 @@ protected:
std::map<ALIGNMENT*,float> m_targetToSourceAlignments;
std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
float m_lastCount;
float m_lastPcfgSum;
@ -126,10 +128,9 @@ public:
void AddProperties( const std::string &str, float count );
void AddProperty( const std::string &key, const std::string &value, float count )
{
void AddProperty( const std::string &key, const std::string &value, float count ) {
std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
if ( iter == m_properties.end() ) {
// key not found: insert property key and value
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();

View File

@ -8,7 +8,8 @@ namespace MosesTraining
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
std::map<std::string,float>& sparseValues) const
{
const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
iter!=allTrees->end(); ++iter ) {
@ -19,24 +20,26 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
void InternalStructFeatureDense::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = treeFragment->find("NP", start)) != string::npos) {
countNP += count;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
std::map<std::string,float>& sparseValues) const
{
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = treeFragment->find("NP", start)) != string::npos) {
countNP += count;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
std::map<std::string,float>& sparseValues) const
{
//cout<<"Sparse: "<<*internalStruct<<endl;
if(treeFragment->find("VBZ")!=std::string::npos)
sparseValues["NTVBZ"] += count;

View File

@ -21,20 +21,20 @@ namespace MosesTraining
class InternalStructFeature : public ScoreFeature
{
public:
InternalStructFeature() : m_type(0) {};
/** Add the values for this feature function. */
void add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
InternalStructFeature() : m_type(0) {};
/** Add the values for this feature function. */
void add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
protected:
/** Overridden in subclass */
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const = 0;
int m_type;
/** Overridden in subclass */
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const = 0;
int m_type;
};
class InternalStructFeatureDense : public InternalStructFeature
@ -45,10 +45,10 @@ public:
m_type=1;
} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class InternalStructFeatureSparse : public InternalStructFeature
@ -59,10 +59,10 @@ public:
m_type=2;
}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
}

View File

@ -77,12 +77,12 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
} else if(args[i] == "--TreeFeatureSparse"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
} else if(args[i] == "--TreeFeatureDense"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else if(args[i] == "--TreeFeatureSparse") {
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
} else if(args[i] == "--TreeFeatureDense") {
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
}
@ -92,8 +92,8 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
float count,
int sentenceId) const
{
for (size_t i = 0; i < m_features.size(); ++i) {
m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId);

View File

@ -92,9 +92,9 @@ class ExtractTask
public:
ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation)
:m_sentence(sentence),
m_options(initoptions),
m_extractFileOrientation(extractFileOrientation)
{}
m_options(initoptions),
m_extractFileOrientation(extractFileOrientation)
{}
void Run();
private:
void extract(SentenceAlignment &);
@ -151,11 +151,11 @@ int main(int argc, char* argv[])
}
options.initInstanceWeightsFile(argv[++i]);
} else if (strcmp(argv[i], "--Debug") == 0) {
options.debug = true;
options.debug = true;
} else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
options.minPhraseLength = atoi(argv[++i]);
options.minPhraseLength = atoi(argv[++i]);
} else if (strcmp(argv[i], "--Separator") == 0) {
options.separator = argv[++i];
options.separator = argv[++i];
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@ -605,16 +605,14 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
int getClass(const std::string &str)
{
size_t pos = str.find("swap");
if (pos == str.npos) {
return 0;
}
else if (pos == 0) {
return 1;
}
else {
return 2;
}
size_t pos = str.find("swap");
if (pos == str.npos) {
return 0;
} else if (pos == 0) {
return 1;
} else {
return 2;
}
}
void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
@ -635,19 +633,19 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// start
m_extractFileOrientation << "<s> ";
for(int fi=0; fi<startF; fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// middle
for(int fi=startF; fi<=endF; fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// end
for(int fi=endF+1; fi<sentence.source.size(); fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << "</s> ";
@ -655,7 +653,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// target
/*
for(int ei=startE; ei<=endE; ei++) {
m_extractFileOrientation << sentence.target[ei] << " ";
m_extractFileOrientation << sentence.target[ei] << " ";
}
*/
m_extractFileOrientation << endl;

View File

@ -155,7 +155,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
unalignedFWFlag = true;
if (i+1==argc) {
std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
exit(1);
}
fileNameFunctionWords = argv[++i];
@ -224,8 +224,8 @@ int main(int argc, char* argv[])
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
std::cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << std::endl;
std::cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << std::endl;
exit(1);
}
phraseTableFile = outputFile;
@ -295,7 +295,9 @@ int main(int argc, char* argv[])
tmpCount, tmpPcfgSum);
bool matchesPrevious = false;
bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
bool sourceMatch = true;
bool targetMatch = true;
bool alignmentMatch = true; // be careful with these,
// ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
// once the first of them has been found to have to be set to false
@ -557,45 +559,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
// alignment
if ( hierarchicalFlag ) {
// always output alignment if hiero style
assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
std::vector<std::string> alignment;
for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
if ( bestAlignmentT2S->at(j).size() != 1 ) {
std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
phraseTableFile.flush();
assert(bestAlignmentT2S->at(j).size() == 1);
}
size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
//phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
} else {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
}
// always output alignment if hiero style
assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
std::vector<std::string> alignment;
for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
if ( bestAlignmentT2S->at(j).size() != 1 ) {
std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
phraseTableFile.flush();
assert(bestAlignmentT2S->at(j).size() == 1);
}
}
// now print all alignments, sorted by source index
sort(alignment.begin(), alignment.end());
for (size_t i = 0; i < alignment.size(); ++i) {
phraseTableFile << alignment[i] << " ";
}
} else if ( !inverseFlag && wordAlignmentFlag) {
// alignment info in pb model
for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
//phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
} else {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
}
}
}
// now print all alignments, sorted by source index
sort(alignment.begin(), alignment.end());
for (size_t i = 0; i < alignment.size(); ++i) {
phraseTableFile << alignment[i] << " ";
}
} else if ( !inverseFlag && wordAlignmentFlag) {
// alignment info in pb model
for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
phraseTableFile << sourcePos << "-" << j << " ";
}
}
}
phraseTableFile << " ||| ";
@ -808,9 +810,9 @@ void LexicalTable::load( const string &fileName )
std::vector<string> token = tokenize( line );
if (token.size() != 3) {
std::cerr << "line " << i << " in " << fileName
<< " has wrong number of tokens, skipping:" << std::endl
<< token.size() << " " << token[0] << " " << line << std::endl;
std::cerr << "line " << i << " in " << fileName
<< " has wrong number of tokens, skipping:" << std::endl
<< token.size() << " " << token[0] << " " << line << std::endl;
continue;
}
@ -889,7 +891,8 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment)
{
// typedef std::vector< std::set<size_t> > ALIGNMENT;
outSourceToTargetAlignment->clear();