From 20b3e8929e93c40e0f1ec61b9268330fe59607f9 Mon Sep 17 00:00:00 2001 From: Nicola Bertoldi Date: Mon, 19 May 2014 15:35:08 +0200 Subject: [PATCH] beautify --- OnDiskPt/Main.cpp | 4 +- mert/MeteorScorer.cpp | 9 +- mert/MeteorScorer.h | 2 +- mert/PreProcessFilter.cpp | 2 +- moses-chart-cmd/IOWrapper.cpp | 52 ++++----- moses-chart-cmd/Main.cpp | 6 +- moses-cmd/Main.cpp | 32 +++--- phrase-extract/DomainFeature.cpp | 14 +-- phrase-extract/DomainFeature.h | 4 +- phrase-extract/ExtractionPhrasePair.cpp | 52 ++++----- phrase-extract/ExtractionPhrasePair.h | 27 ++--- phrase-extract/InternalStructFeature.cpp | 29 ++--- phrase-extract/InternalStructFeature.h | 38 +++---- phrase-extract/ScoreFeature.cpp | 18 +-- phrase-extract/ScoreFeature.h | 12 +- phrase-extract/extract-ordering-main.cpp | 38 +++---- phrase-extract/score-main.cpp | 135 ++++++++++++----------- 17 files changed, 240 insertions(+), 234 deletions(-) diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp index f2d75ed05..063caddb3 100644 --- a/OnDiskPt/Main.cpp +++ b/OnDiskPt/Main.cpp @@ -221,7 +221,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase phrase.AddWord(word); if (retSourceTarget == 1) { - out = word; + out = word; } } @@ -232,7 +232,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase phrase.AddWord(word); if (retSourceTarget == 2) { - out = word; + out = word; } } diff --git a/mert/MeteorScorer.cpp b/mert/MeteorScorer.cpp index 2c70da802..904e68efd 100644 --- a/mert/MeteorScorer.cpp +++ b/mert/MeteorScorer.cpp @@ -34,7 +34,8 @@ namespace MosesTuning #define CHILD_STDOUT_WRITE pipefds_output[1] MeteorScorer::MeteorScorer(const string& config) - : StatisticsBasedScorer("METEOR",config) { + : StatisticsBasedScorer("METEOR",config) +{ meteor_jar = getConfig("jar", ""); meteor_lang = getConfig("lang", "en"); meteor_task = getConfig("task", "tune"); @@ -88,7 +89,8 @@ MeteorScorer::MeteorScorer(const string& config) m_from_meteor = new ifdstream(CHILD_STDOUT_READ); } -MeteorScorer::~MeteorScorer() { +MeteorScorer::~MeteorScorer() +{ // Cleanup IO delete m_to_meteor; delete m_from_meteor; @@ -171,7 +173,8 @@ float MeteorScorer::calculateScore(const vector& comps) const // Meteor unsupported, throw error if used MeteorScorer::MeteorScorer(const string& config) - : StatisticsBasedScorer("METEOR",config) { + : StatisticsBasedScorer("METEOR",config) +{ throw runtime_error("Meteor unsupported, requires GLIBCXX"); } diff --git a/mert/MeteorScorer.h b/mert/MeteorScorer.h index 336833b97..8260a9455 100644 --- a/mert/MeteorScorer.h +++ b/mert/MeteorScorer.h @@ -20,7 +20,7 @@ class ifdstream; class ScoreStats; /** - * Meteor scoring + * Meteor scoring * * https://github.com/mjdenkowski/meteor * http://statmt.org/wmt11/pdf/WMT07.pdf diff --git a/mert/PreProcessFilter.cpp b/mert/PreProcessFilter.cpp index a36ed6155..7a3add789 100644 --- a/mert/PreProcessFilter.cpp +++ b/mert/PreProcessFilter.cpp @@ -35,7 +35,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand) m_fromFilter(NULL) { #if defined __MINGW32__ - //TODO(jie): replace this function with boost implementation + //TODO(jie): replace this function with boost implementation #else // Child error signal install // sigaction is the replacement for the traditional signal() method diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp index 81612ed1f..a24d55caa 100644 --- a/moses-chart-cmd/IOWrapper.cpp +++ b/moses-chart-cmd/IOWrapper.cpp @@ -132,7 +132,7 @@ IOWrapper::IOWrapper(const std::vector &inputFactorOrder m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str()); m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream); UTIL_THROW_IF2(!m_alignmentInfoStream->good(), - "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile()); + "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile()); } if (!staticData.GetOutputUnknownsFile().empty()) { @@ -140,7 +140,7 @@ IOWrapper::IOWrapper(const std::vector &inputFactorOrder m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream); UTIL_THROW_IF2(!m_unknownsStream->good(), "File for unknowns words could not be opened: " << - staticData.GetOutputUnknownsFile()); + staticData.GetOutputUnknownsFile()); } } @@ -188,7 +188,7 @@ InputType*IOWrapper::GetInput(InputType* inputType) void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector &outputFactorOrder, bool reportAllFactors) { UTIL_THROW_IF2(outputFactorOrder.size() == 0, - "Cannot be empty phrase"); + "Cannot be empty phrase"); if (reportAllFactors == true) { out << phrase; } else { @@ -197,12 +197,12 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector& mbrBestHypo, l for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) { const Factor *factor = mbrBestHypo[i]; UTIL_THROW_IF(factor == NULL, util::Exception, - "No factor at position " << i ); + "No factor at position " << i ); cout << *factor << " "; } @@ -403,7 +403,7 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext & // recursive const search::Applied *child = applied->Children(); for (size_t i = 0; i < applied->GetArity(); i++) { - OutputTranslationOptions(out, applicationContext, child++, sentence, translationId); + OutputTranslationOptions(out, applicationContext, child++, sentence, translationId); } } @@ -459,7 +459,7 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica // recursive const search::Applied *child = applied->Children(); for (size_t i = 0; i < applied->GetArity(); i++) { - OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId); + OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId); } } @@ -476,7 +476,7 @@ void IOWrapper::OutputDetailedTranslationReport( OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId); UTIL_THROW_IF2(m_detailOutputCollector == NULL, - "No ouput file for detailed reports specified"); + "No ouput file for detailed reports specified"); m_detailOutputCollector->Write(translationId, out.str()); } @@ -493,7 +493,7 @@ void IOWrapper::OutputDetailedTranslationReport( OutputTranslationOptions(out, applicationContext, applied, sentence, translationId); UTIL_THROW_IF2(m_detailOutputCollector == NULL, - "No ouput file for detailed reports specified"); + "No ouput file for detailed reports specified"); m_detailOutputCollector->Write(translationId, out.str()); } @@ -510,18 +510,18 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId); UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL, - "No output file for tree fragments specified"); + "No output file for tree fragments specified"); //Tree of full sentence const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure(); if (treeStructure != NULL) { const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); for( size_t i=0; i(hypo->GetFFState(i)); out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n"; break; - } + } } } @@ -542,7 +542,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId); UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL, - "No output file for tree fragments specified"); + "No output file for tree fragments specified"); //Tree of full sentence //TODO: incremental search doesn't support stateful features @@ -581,7 +581,7 @@ void IOWrapper::OutputDetailedAllTranslationReport( } } UTIL_THROW_IF2(m_detailAllOutputCollector == NULL, - "No output file for details specified"); + "No output file for details specified"); m_detailAllOutputCollector->Write(translationId, out.str()); } @@ -609,7 +609,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId) // delete 1st & last UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); @@ -641,7 +641,7 @@ void IOWrapper::OutputBestHypo(search::Applied applied, long translationId) Incremental::ToPhrase(applied, outPhrase); // delete 1st & last UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); @@ -730,7 +730,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran // delete 1st & last UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); @@ -805,7 +805,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList, } bool includeWordAlignment = - StaticData::Instance().PrintAlignmentInfoInNbest(); + StaticData::Instance().PrintAlignmentInfoInNbest(); for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin(); p != nBestList.end(); ++p) { @@ -816,7 +816,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList, // delete and UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); @@ -858,7 +858,7 @@ void IOWrapper::OutputNBestList(const std::vector &nbest, long Incremental::PhraseAndFeatures(*i, outputPhrase, features); // and UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); @@ -980,9 +980,9 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT } size_t IOWrapper::OutputAlignmentNBest( - Alignments &retAlign, - const Moses::ChartKBestExtractor::Derivation &derivation, - size_t startTarget) + Alignments &retAlign, + const Moses::ChartKBestExtractor::Derivation &derivation, + size_t startTarget) { const ChartHypothesis &hypo = derivation.edge.head->hypothesis; @@ -1023,7 +1023,7 @@ size_t IOWrapper::OutputAlignmentNBest( // Recursively look thru child hypos size_t currStartTarget = startTarget + totalTargetSize; size_t targetSize = OutputAlignmentNBest(retAlign, subderivation, - currStartTarget); + currStartTarget); targetOffsets[targetPos] = targetSize; totalTargetSize += targetSize; @@ -1114,7 +1114,7 @@ size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypoth size_t targetInd = 0; for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { if (tp.GetWord(targetPos).IsNonTerminal()) { - UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); + UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); size_t sourceInd = targetPos2SourceInd[targetPos]; size_t sourcePos = sourceInd2pos[sourceInd]; diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp index fd82b5692..9fae28b10 100644 --- a/moses-chart-cmd/Main.cpp +++ b/moses-chart-cmd/Main.cpp @@ -234,8 +234,7 @@ static void ShowWeights() const StatefulFeatureFunction *ff = sff[i]; if (ff->IsTuneable()) { PrintFeatureWeight(ff); - } - else { + } else { cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; } } @@ -243,8 +242,7 @@ static void ShowWeights() const StatelessFeatureFunction *ff = slf[i]; if (ff->IsTuneable()) { PrintFeatureWeight(ff); - } - else { + } else { cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; } } diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index 98c113a19..f77dc2373 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -253,17 +253,17 @@ public: if ( appendSuffix ) { fileName << "." << compression; } - boost::iostreams::filtering_ostream *file - = new boost::iostreams::filtering_ostream; + boost::iostreams::filtering_ostream *file + = new boost::iostreams::filtering_ostream; if ( compression == "gz" ) { file->push( boost::iostreams::gzip_compressor() ); } else if ( compression == "bz2" ) { file->push( boost::iostreams::bzip2_compressor() ); } else if ( compression != "txt" ) { - TRACE_ERR("Unrecognized hypergraph compression format (" - << compression - << ") - using uncompressed plain txt" << std::endl); + TRACE_ERR("Unrecognized hypergraph compression format (" + << compression + << ") - using uncompressed plain txt" << std::endl); compression = "txt"; } @@ -274,10 +274,10 @@ public: manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file); file -> flush(); } else { - TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber - << " because the output file " << fileName.str() - << " is not open or not ready for writing" - << std::endl); + TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber + << " because the output file " << fileName.str() + << " is not open or not ready for writing" + << std::endl); } file -> pop(); delete file; @@ -504,8 +504,7 @@ static void ShowWeights() const StatefulFeatureFunction *ff = sff[i]; if (ff->IsTuneable()) { PrintFeatureWeight(ff); - } - else { + } else { cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; } } @@ -513,8 +512,7 @@ static void ShowWeights() const StatelessFeatureFunction *ff = slf[i]; if (ff->IsTuneable()) { PrintFeatureWeight(ff); - } - else { + } else { cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; } } @@ -585,7 +583,7 @@ int main(int argc, char** argv) #ifdef HAVE_PROTOBUF GOOGLE_PROTOBUF_VERIFY_VERSION; #endif - + // echo command line, if verbose IFVERBOSE(1) { TRACE_ERR("command: "); @@ -604,15 +602,15 @@ int main(int argc, char** argv) exit(1); } -std::cerr <<"Before StaticData::LoadDataStatic" << std::endl; + std::cerr <<"Before StaticData::LoadDataStatic" << std::endl; // initialize all "global" variables, which are stored in StaticData // note: this also loads models such as the language model, etc. if (!StaticData::LoadDataStatic(¶ms, argv[0])) { exit(1); } -std::cerr <<"After StaticData::LoadDataStatic" << std::endl; + std::cerr <<"After StaticData::LoadDataStatic" << std::endl; -std::cerr <<"Before ShowWeights" << std::endl; + std::cerr <<"Before ShowWeights" << std::endl; // setting "-show-weights" -> just dump out weights and exit if (params.isParamSpecified("show-weights")) { ShowWeights(); diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp index 2f99a8709..0526d058b 100644 --- a/phrase-extract/DomainFeature.cpp +++ b/phrase-extract/DomainFeature.cpp @@ -55,9 +55,9 @@ DomainFeature::DomainFeature(const string& domainFile) : m_propertyKey("domain") m_domain.load(domainFile); } -void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, - float count, - int sentenceId) const +void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, + float count, + int sentenceId) const { std::string value = m_domain.getDomainOfSentence(sentenceId); phrasePair.AddProperty(m_propertyKey, value, count); @@ -69,13 +69,13 @@ void DomainFeature::add(const ScoreFeatureContext& context, { const map *domainCount = context.phrasePair.GetProperty(m_propertyKey); assert( domainCount != NULL ); - add(*domainCount, - context.phrasePair.GetCount(), - context.maybeLog, + add(*domainCount, + context.phrasePair.GetCount(), + context.maybeLog, denseValues, sparseValues); } -void SubsetDomainFeature::add(const map& domainCount, +void SubsetDomainFeature::add(const map& domainCount, float count, const MaybeLog& maybeLog, std::vector& denseValues, diff --git a/phrase-extract/DomainFeature.h b/phrase-extract/DomainFeature.h index 8ebc599e2..bcb2e63a2 100644 --- a/phrase-extract/DomainFeature.h +++ b/phrase-extract/DomainFeature.h @@ -35,8 +35,8 @@ public: DomainFeature(const std::string& domainFile); - void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, - float count, + void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, + float count, int sentenceId) const; void add(const ScoreFeatureContext& context, diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp index a975b4126..102537ca1 100644 --- a/phrase-extract/ExtractionPhrasePair.cpp +++ b/phrase-extract/ExtractionPhrasePair.cpp @@ -29,7 +29,8 @@ using namespace std; -namespace MosesTraining { +namespace MosesTraining +{ extern Vocabulary vcbT; @@ -38,23 +39,23 @@ extern Vocabulary vcbS; extern bool hierarchicalFlag; -ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource, - const PHRASE *phraseTarget, - ALIGNMENT *targetToSourceAlignment, - float count, float pcfgSum ) : - m_phraseSource(phraseSource), - m_phraseTarget(phraseTarget), - m_count(count), - m_pcfgSum(pcfgSum) +ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource, + const PHRASE *phraseTarget, + ALIGNMENT *targetToSourceAlignment, + float count, float pcfgSum ) : + m_phraseSource(phraseSource), + m_phraseTarget(phraseTarget), + m_count(count), + m_pcfgSum(pcfgSum) { assert(phraseSource->empty()); assert(phraseTarget->empty()); m_count = count; m_pcfgSum = pcfgSum; - + std::pair< std::map::iterator, bool > insertedAlignment = - m_targetToSourceAlignments.insert( std::pair(targetToSourceAlignment,count) ); + m_targetToSourceAlignments.insert( std::pair(targetToSourceAlignment,count) ); m_lastTargetToSourceAlignment = insertedAlignment.first; m_lastCount = m_count; @@ -64,29 +65,30 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource, } -ExtractionPhrasePair::~ExtractionPhrasePair( ) { +ExtractionPhrasePair::~ExtractionPhrasePair( ) +{ Clear(); } // return value: true if the given alignment was seen for the first time and thus will be stored, // false if it was present already (the pointer may thus be deleted( -bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment, - float count, float pcfgSum ) +bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment, + float count, float pcfgSum ) { m_count += count; m_pcfgSum += pcfgSum; m_lastCount = count; m_lastPcfgSum = pcfgSum; - + std::map::iterator iter = m_lastTargetToSourceAlignment; if ( *(iter->first) == *targetToSourceAlignment ) { iter->second += count; return false; } else { std::pair< std::map::iterator, bool > insertedAlignment = - m_targetToSourceAlignments.insert( std::pair(targetToSourceAlignment,count) ); + m_targetToSourceAlignments.insert( std::pair(targetToSourceAlignment,count) ); if ( !insertedAlignment.second ) { // the alignment already exists: increment count insertedAlignment.first->second += count; @@ -105,7 +107,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum ) m_pcfgSum += pcfgSum; m_lastTargetToSourceAlignment->second += count; // properties - for ( std::map >::iterator iter=m_properties.begin(); + for ( std::map >::iterator iter=m_properties.begin(); iter !=m_properties.end(); ++iter ) { LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second; (*lastPropertyValue)->second += count; @@ -116,7 +118,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum ) } -// Check for lexical match +// Check for lexical match // and in case of SCFG rules for equal non-terminal alignment. bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource, const PHRASE *otherPhraseTarget, @@ -132,9 +134,9 @@ bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource, return MatchesAlignment( otherTargetToSourceAlignment ); } -// Check for lexical match +// Check for lexical match // and in case of SCFG rules for equal non-terminal alignment. -// Set boolean indicators. +// Set boolean indicators. // (Note that we check in the order: target - source - alignment // and do not touch the subsequent boolean indicators once a previous one has been set to false.) bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource, @@ -194,7 +196,7 @@ bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlign return true; } -void ExtractionPhrasePair::Clear() +void ExtractionPhrasePair::Clear() { delete m_phraseSource; delete m_phraseTarget; @@ -218,7 +220,7 @@ void ExtractionPhrasePair::Clear() m_lastCount = 0.0f; m_lastPcfgSum = 0.0f; m_lastTargetToSourceAlignment = m_targetToSourceAlignments.begin(); - + m_isValid = false; } @@ -252,7 +254,7 @@ const ALIGNMENT *ExtractionPhrasePair::FindBestAlignmentTargetToSource() const std::map::const_iterator bestAlignment = m_targetToSourceAlignments.end(); - for (std::map::const_iterator iter=m_targetToSourceAlignments.begin(); + for (std::map::const_iterator iter=m_targetToSourceAlignments.begin(); iter!=m_targetToSourceAlignments.end(); ++iter) { if ( (iter->second > bestAlignmentCount) || ( (iter->second == bestAlignmentCount) && @@ -281,7 +283,7 @@ const std::string *ExtractionPhrasePair::FindBestPropertyValue(const std::string PROPERTY_VALUES::const_iterator bestPropertyValue = allPropertyValues->end(); - for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); + for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); iter!=allPropertyValues->end(); ++iter) { if ( (iter->second > bestPropertyCount) || ( (iter->second == bestPropertyCount) && @@ -308,7 +310,7 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke } std::ostringstream oss; - for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); + for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); iter!=allPropertyValues->end(); ++iter) { if (iter!=allPropertyValues->begin()) { oss << " "; diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h index f04984391..e9f643d2c 100644 --- a/phrase-extract/ExtractionPhrasePair.h +++ b/phrase-extract/ExtractionPhrasePair.h @@ -24,20 +24,22 @@ #include #include -namespace MosesTraining { +namespace MosesTraining +{ typedef std::vector< std::set > ALIGNMENT; -class ExtractionPhrasePair { +class ExtractionPhrasePair +{ protected: typedef std::map PROPERTY_VALUES; typedef std::map::iterator LAST_PROPERTY_VALUE; - + bool m_isValid; const PHRASE *m_phraseSource; @@ -47,8 +49,8 @@ protected: float m_pcfgSum; std::map m_targetToSourceAlignments; - std::map > m_properties; + std::map > m_properties; float m_lastCount; float m_lastPcfgSum; @@ -56,14 +58,14 @@ protected: public: - ExtractionPhrasePair( const PHRASE *phraseSource, - const PHRASE *phraseTarget, - ALIGNMENT *targetToSourceAlignment, + ExtractionPhrasePair( const PHRASE *phraseSource, + const PHRASE *phraseTarget, + ALIGNMENT *targetToSourceAlignment, float count, float pcfgSum ); ~ExtractionPhrasePair(); - bool Add( ALIGNMENT *targetToSourceAlignment, + bool Add( ALIGNMENT *targetToSourceAlignment, float count, float pcfgSum ); void IncrementPrevious( float count, float pcfgSum ); @@ -91,7 +93,7 @@ public: const PHRASE *GetSource() const { return m_phraseSource; } - + const PHRASE *GetTarget() const { return m_phraseTarget; } @@ -126,10 +128,9 @@ public: void AddProperties( const std::string &str, float count ); - void AddProperty( const std::string &key, const std::string &value, float count ) - { + void AddProperty( const std::string &key, const std::string &value, float count ) { std::map >::iterator iter = m_properties.find(key); + std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key); if ( iter == m_properties.end() ) { // key not found: insert property key and value PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES(); diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp index 3757b0e43..a2369a80c 100644 --- a/phrase-extract/InternalStructFeature.cpp +++ b/phrase-extract/InternalStructFeature.cpp @@ -8,7 +8,8 @@ namespace MosesTraining void InternalStructFeature::add(const ScoreFeatureContext& context, std::vector& denseValues, - std::map& sparseValues) const { + std::map& sparseValues) const +{ const std::map *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only? for ( std::map::const_iterator iter=allTrees->begin(); iter!=allTrees->end(); ++iter ) { @@ -19,24 +20,26 @@ void InternalStructFeature::add(const ScoreFeatureContext& context, void InternalStructFeatureDense::add(const std::string *treeFragment, float count, std::vector& denseValues, - std::map& sparseValues) const { - //cout<<"Dense: "<<*internalStruct<find("NP", start)) != string::npos) { - countNP += count; - start+=2; //length of "NP" - } - //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? - //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 ) - denseValues.push_back(exp(countNP)); + std::map& sparseValues) const +{ + //cout<<"Dense: "<<*internalStruct<find("NP", start)) != string::npos) { + countNP += count; + start+=2; //length of "NP" + } + //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? + //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 ) + denseValues.push_back(exp(countNP)); } void InternalStructFeatureSparse::add(const std::string *treeFragment, float count, std::vector& denseValues, - std::map& sparseValues) const { + std::map& sparseValues) const +{ //cout<<"Sparse: "<<*internalStruct<find("VBZ")!=std::string::npos) sparseValues["NTVBZ"] += count; diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h index 7969dc8a8..bd44f61fb 100644 --- a/phrase-extract/InternalStructFeature.h +++ b/phrase-extract/InternalStructFeature.h @@ -21,20 +21,20 @@ namespace MosesTraining class InternalStructFeature : public ScoreFeature { public: - InternalStructFeature() : m_type(0) {}; - /** Add the values for this feature function. */ - void add(const ScoreFeatureContext& context, - std::vector& denseValues, - std::map& sparseValues) const; + InternalStructFeature() : m_type(0) {}; + /** Add the values for this feature function. */ + void add(const ScoreFeatureContext& context, + std::vector& denseValues, + std::map& sparseValues) const; protected: - /** Overridden in subclass */ - virtual void add(const std::string *treeFragment, - float count, - std::vector& denseValues, - std::map& sparseValues) const = 0; - int m_type; + /** Overridden in subclass */ + virtual void add(const std::string *treeFragment, + float count, + std::vector& denseValues, + std::map& sparseValues) const = 0; + int m_type; }; class InternalStructFeatureDense : public InternalStructFeature @@ -45,10 +45,10 @@ public: m_type=1; } //std::cout<<"InternalStructFeatureDense: Construct "<& denseValues, - std::map& sparseValues) const; + virtual void add(const std::string *treeFragment, + float count, + std::vector& denseValues, + std::map& sparseValues) const; }; class InternalStructFeatureSparse : public InternalStructFeature @@ -59,10 +59,10 @@ public: m_type=2; }// std::cout<<"InternalStructFeatureSparse: Construct "<& denseValues, - std::map& sparseValues) const; + virtual void add(const std::string *treeFragment, + float count, + std::vector& denseValues, + std::map& sparseValues) const; }; } diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp index 7db57b38e..c037ab584 100644 --- a/phrase-extract/ScoreFeature.cpp +++ b/phrase-extract/ScoreFeature.cpp @@ -77,12 +77,12 @@ void ScoreFeatureManager::configure(const std::vector args) } sparseDomainAdded = true; m_includeSentenceId = true; - } else if(args[i] == "--TreeFeatureSparse"){ - //MARIA - m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); - } else if(args[i] == "--TreeFeatureDense"){ - //MARIA - m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); + } else if(args[i] == "--TreeFeatureSparse") { + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); + } else if(args[i] == "--TreeFeatureDense") { + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); } else { UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]); } @@ -91,9 +91,9 @@ void ScoreFeatureManager::configure(const std::vector args) } -void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, - float count, - int sentenceId) const +void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, + float count, + int sentenceId) const { for (size_t i = 0; i < m_features.size(); ++i) { m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId); diff --git a/phrase-extract/ScoreFeature.h b/phrase-extract/ScoreFeature.h index 926397e71..1f697c989 100644 --- a/phrase-extract/ScoreFeature.h +++ b/phrase-extract/ScoreFeature.h @@ -84,10 +84,10 @@ class ScoreFeature public: /** Some features might need to store properties in ExtractionPhrasePair, - * e.g. to pass along external information loaded by a feature + * e.g. to pass along external information loaded by a feature * which may distinguish several phrase occurrences based on sentence ID */ - virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, - float count, + virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, + float count, int sentenceId) const {}; /** Add the values for this feature function. */ @@ -113,10 +113,10 @@ public: void configure(const std::vector args); /** Some features might need to store properties in ExtractionPhrasePair, - * e.g. to pass along external information loaded by a feature + * e.g. to pass along external information loaded by a feature * which may distinguish several phrase occurrences based on sentence ID */ - void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, - float count, + void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, + float count, int sentenceId) const; /** Add all the features */ diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp index 104457b01..b37309d47 100644 --- a/phrase-extract/extract-ordering-main.cpp +++ b/phrase-extract/extract-ordering-main.cpp @@ -92,9 +92,9 @@ class ExtractTask public: ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation) :m_sentence(sentence), - m_options(initoptions), - m_extractFileOrientation(extractFileOrientation) - {} + m_options(initoptions), + m_extractFileOrientation(extractFileOrientation) + {} void Run(); private: void extract(SentenceAlignment &); @@ -151,11 +151,11 @@ int main(int argc, char* argv[]) } options.initInstanceWeightsFile(argv[++i]); } else if (strcmp(argv[i], "--Debug") == 0) { - options.debug = true; + options.debug = true; } else if (strcmp(argv[i], "--MinPhraseLength") == 0) { - options.minPhraseLength = atoi(argv[++i]); + options.minPhraseLength = atoi(argv[++i]); } else if (strcmp(argv[i], "--Separator") == 0) { - options.separator = argv[++i]; + options.separator = argv[++i]; } else if(strcmp(argv[i],"--model") == 0) { if (i+1 >= argc) { cerr << "extract: syntax error, no model's information provided to the option --model " << endl; @@ -605,16 +605,14 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType) int getClass(const std::string &str) { - size_t pos = str.find("swap"); - if (pos == str.npos) { - return 0; - } - else if (pos == 0) { - return 1; - } - else { - return 2; - } + size_t pos = str.find("swap"); + if (pos == str.npos) { + return 0; + } else if (pos == 0) { + return 1; + } else { + return 2; + } } void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo) @@ -635,19 +633,19 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, // start m_extractFileOrientation << " "; for(int fi=0; fi "; @@ -655,7 +653,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, // target /* for(int ei=startE; ei<=endE; ei++) { - m_extractFileOrientation << sentence.target[ei] << " "; + m_extractFileOrientation << sentence.target[ei] << " "; } */ m_extractFileOrientation << endl; diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp index cd8f9ddaa..72c4c1476 100644 --- a/phrase-extract/score-main.cpp +++ b/phrase-extract/score-main.cpp @@ -68,7 +68,7 @@ std::map sourceLHSCounts; std::map* > targetLHSAndSourceLHSJointCounts; std::set sourceLabelSet; -std::map sourceLabels; +std::map sourceLabels; std::vector sourceLabelsByIndex; Vocabulary vcbT; @@ -79,12 +79,12 @@ Vocabulary vcbS; std::vector tokenize( const char [] ); void processLine( std::string line, - int lineID, bool includeSentenceIdFlag, int &sentenceId, + int lineID, bool includeSentenceIdFlag, int &sentenceId, PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, std::string &additionalPropertiesString, float &count, float &pcfgSum ); void writeCountOfCounts( const std::string &fileNameCountOfCounts ); -void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, +void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb ); void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog ); double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ); @@ -100,7 +100,7 @@ void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, co int main(int argc, char* argv[]) { - std::cerr << "Score v2.1 -- " + std::cerr << "Score v2.1 -- " << "scoring methods for extracted rules" << std::endl; ScoreFeatureManager featureManager; @@ -155,7 +155,7 @@ int main(int argc, char* argv[]) } else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) { unalignedFWFlag = true; if (i+1==argc) { - std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl; + std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl; exit(1); } fileNameFunctionWords = argv[++i]; @@ -224,8 +224,8 @@ int main(int argc, char* argv[]) Moses::OutputFileStream *outputFile = new Moses::OutputFileStream(); bool success = outputFile->Open(fileNamePhraseTable); if (!success) { - std::cerr << "ERROR: could not open file phrase table file " - << fileNamePhraseTable << std::endl; + std::cerr << "ERROR: could not open file phrase table file " + << fileNamePhraseTable << std::endl; exit(1); } phraseTableFile = outputFile; @@ -251,12 +251,12 @@ int main(int argc, char* argv[]) tmpPhraseSource = new PHRASE(); tmpPhraseTarget = new PHRASE(); tmpTargetToSourceAlignment = new ALIGNMENT(); - processLine( std::string(line), + processLine( std::string(line), i, featureManager.includeSentenceId(), tmpSentenceId, - tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, + tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, tmpAdditionalPropertiesString, tmpCount, tmpPcfgSum); - phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, + phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, tmpCount, tmpPcfgSum ); phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount ); @@ -288,14 +288,16 @@ int main(int argc, char* argv[]) tmpPhraseTarget = new PHRASE(); tmpTargetToSourceAlignment = new ALIGNMENT(); tmpAdditionalPropertiesString.clear(); - processLine( std::string(line), + processLine( std::string(line), i, featureManager.includeSentenceId(), tmpSentenceId, - tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, + tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, tmpAdditionalPropertiesString, - tmpCount, tmpPcfgSum); + tmpCount, tmpPcfgSum); bool matchesPrevious = false; - bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these, + bool sourceMatch = true; + bool targetMatch = true; + bool alignmentMatch = true; // be careful with these, // ExtractionPhrasePair::Matches() checks them in order and does not continue with the others // once the first of them has been found to have to be set to false @@ -330,7 +332,7 @@ int main(int argc, char* argv[]) if ( !phrasePairsWithSameSource.empty() && !sourceMatch ) { processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb ); - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); + for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); iter!=phrasePairsWithSameSource.end(); ++iter) { delete *iter; } @@ -347,8 +349,8 @@ int main(int argc, char* argv[]) } } - phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, - tmpTargetToSourceAlignment, + phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, + tmpTargetToSourceAlignment, tmpCount, tmpPcfgSum ); phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount ); featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId ); @@ -364,7 +366,7 @@ int main(int argc, char* argv[]) } processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb ); - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); + for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); iter!=phrasePairsWithSameSource.end(); ++iter) { delete *iter; } @@ -384,7 +386,7 @@ int main(int argc, char* argv[]) void processLine( std::string line, - int lineID, bool includeSentenceIdFlag, int &sentenceId, + int lineID, bool includeSentenceIdFlag, int &sentenceId, PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, std::string &additionalPropertiesString, float &count, float &pcfgSum ) @@ -474,7 +476,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts ) } -void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, +void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb ) { if (phrasePairsWithSameSource.size() == 0) { @@ -486,23 +488,23 @@ void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSa //std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl; // loop through phrase pairs - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); + for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); iter!=phrasePairsWithSameSource.end(); ++iter) { // add to total count totalSource += (*iter)->GetCount(); } // output the distinct phrase pairs, one at a time - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); + for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); iter!=phrasePairsWithSameSource.end(); ++iter) { // add to total count outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb ); } } -void outputPhrasePair(const ExtractionPhrasePair &phrasePair, - float totalCount, int distinctCount, - ostream &phraseTableFile, +void outputPhrasePair(const ExtractionPhrasePair &phrasePair, + float totalCount, int distinctCount, + ostream &phraseTableFile, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb ) { @@ -557,45 +559,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, // alignment if ( hierarchicalFlag ) { - // always output alignment if hiero style - assert(phraseTarget->size() == bestAlignmentT2S->size()+1); - std::vector alignment; - for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) { - if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) { - if ( bestAlignmentT2S->at(j).size() != 1 ) { - std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl; - phraseTableFile.flush(); - assert(bestAlignmentT2S->at(j).size() == 1); - } - size_t sourcePos = *(bestAlignmentT2S->at(j).begin()); - //phraseTableFile << sourcePos << "-" << j << " "; - std::stringstream point; - point << sourcePos << "-" << j; - alignment.push_back(point.str()); - } else { - for ( std::set::iterator setIter = (bestAlignmentT2S->at(j)).begin(); - setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) { - size_t sourcePos = *setIter; - std::stringstream point; - point << sourcePos << "-" << j; - alignment.push_back(point.str()); - } + // always output alignment if hiero style + assert(phraseTarget->size() == bestAlignmentT2S->size()+1); + std::vector alignment; + for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) { + if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) { + if ( bestAlignmentT2S->at(j).size() != 1 ) { + std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl; + phraseTableFile.flush(); + assert(bestAlignmentT2S->at(j).size() == 1); } - } - // now print all alignments, sorted by source index - sort(alignment.begin(), alignment.end()); - for (size_t i = 0; i < alignment.size(); ++i) { - phraseTableFile << alignment[i] << " "; - } - } else if ( !inverseFlag && wordAlignmentFlag) { - // alignment info in pb model - for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) { + size_t sourcePos = *(bestAlignmentT2S->at(j).begin()); + //phraseTableFile << sourcePos << "-" << j << " "; + std::stringstream point; + point << sourcePos << "-" << j; + alignment.push_back(point.str()); + } else { for ( std::set::iterator setIter = (bestAlignmentT2S->at(j)).begin(); setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) { size_t sourcePos = *setIter; - phraseTableFile << sourcePos << "-" << j << " "; + std::stringstream point; + point << sourcePos << "-" << j; + alignment.push_back(point.str()); } } + } + // now print all alignments, sorted by source index + sort(alignment.begin(), alignment.end()); + for (size_t i = 0; i < alignment.size(); ++i) { + phraseTableFile << alignment[i] << " "; + } + } else if ( !inverseFlag && wordAlignmentFlag) { + // alignment info in pb model + for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) { + for ( std::set::iterator setIter = (bestAlignmentT2S->at(j)).begin(); + setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) { + size_t sourcePos = *setIter; + phraseTableFile << sourcePos << "-" << j << " "; + } + } } phraseTableFile << " ||| "; @@ -646,7 +648,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, if (kneserNeyFlag) phraseTableFile << " " << distinctCount; - if ((treeFragmentsFlag) && + if ((treeFragmentsFlag) && !inverseFlag) { phraseTableFile << " |||"; } @@ -671,7 +673,7 @@ bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *al // skip } else { const std::set &sourceSet = alignmentTargetToSource->at(currTarget); - for (std::set::const_iterator iter = sourceSet.begin(); + for (std::set::const_iterator iter = sourceSet.begin(); iter != sourceSet.end(); ++iter) { size_t currSource = *iter; @@ -808,9 +810,9 @@ void LexicalTable::load( const string &fileName ) std::vector token = tokenize( line ); if (token.size() != 3) { - std::cerr << "line " << i << " in " << fileName - << " has wrong number of tokens, skipping:" << std::endl - << token.size() << " " << token[0] << " " << line << std::endl; + std::cerr << "line " << i << " in " << fileName + << " has wrong number of tokens, skipping:" << std::endl + << token.size() << " " << token[0] << " " << line << std::endl; continue; } @@ -889,15 +891,16 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget, void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget, - const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) { -// typedef std::vector< std::set > ALIGNMENT; + const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) +{ +// typedef std::vector< std::set > ALIGNMENT; outSourceToTargetAlignment->clear(); size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size()); outSourceToTargetAlignment->resize(numberOfSourceSymbols); // add alignment point for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) { - for ( std::set::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin(); + for ( std::set::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin(); setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) { size_t sourcePosition = *setIter; outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);