From fdc504d47ac094f402ee4b046cf3dfe877f3b3c9 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 10 Mar 2014 14:08:00 +0000 Subject: [PATCH] Changes on main branch files while I was working on dynamic phrase tables. --- contrib/other-builds/extract-rules/.project | 5 + .../other-builds/moses-chart-cmd/.cproject | 9 +- contrib/other-builds/moses-cmd/.cproject | 9 +- mert/FeatureStats.cpp | 6 +- mert/ScoreStats.cpp | 8 +- mert/Singleton.h | 6 +- moses-chart-cmd/IOWrapper.cpp | 20 +- moses/ChartParser.cpp | 2 +- .../LexicalReordering/LexicalReordering.cpp | 3 +- moses/LM/DALMWrapper.cpp | 103 +- moses/StaticData.cpp | 48 + moses/StaticData.h | 16 + .../TranslationModel/DynSAInclude/onlineRLM.h | 4 +- phrase-extract/consolidate-main.cpp | 6 +- phrase-extract/score-main.cpp | 909 ------------------ 15 files changed, 163 insertions(+), 991 deletions(-) diff --git a/contrib/other-builds/extract-rules/.project b/contrib/other-builds/extract-rules/.project index 76de5a624..29ffed2a9 100644 --- a/contrib/other-builds/extract-rules/.project +++ b/contrib/other-builds/extract-rules/.project @@ -65,6 +65,11 @@ 1 PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h + + RuleExtractionOptions.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h + SentenceAlignment.cpp 1 diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject index b6cbc127d..86dfbac5b 100644 --- a/contrib/other-builds/moses-chart-cmd/.cproject +++ b/contrib/other-builds/moses-chart-cmd/.cproject @@ -5,13 +5,13 @@ - - + + @@ -70,7 +70,6 @@ - @@ -108,13 +107,13 @@ - - + + diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject index f9eeebf1d..828b71395 100644 --- a/contrib/other-builds/moses-cmd/.cproject +++ b/contrib/other-builds/moses-cmd/.cproject @@ -5,13 +5,13 @@ - - + + @@ -71,7 +71,6 @@ - @@ -109,13 +108,13 @@ - - + + diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp index aa32e1fef..5a12be70a 100644 --- a/mert/FeatureStats.cpp +++ b/mert/FeatureStats.cpp @@ -181,10 +181,8 @@ FeatureStats::FeatureStats(const size_t size) FeatureStats::~FeatureStats() { - if (m_array) { - delete [] m_array; - m_array = NULL; - } + delete [] m_array; + m_array = NULL; } void FeatureStats::Copy(const FeatureStats &stats) diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp index 1c66cdb5f..771880fa1 100644 --- a/mert/ScoreStats.cpp +++ b/mert/ScoreStats.cpp @@ -35,10 +35,8 @@ ScoreStats::ScoreStats(const size_t size) ScoreStats::~ScoreStats() { - if (m_array) { - delete [] m_array; - m_array = NULL; - } + delete [] m_array; + m_array = NULL; } void ScoreStats::Copy(const ScoreStats &stats) @@ -157,4 +155,4 @@ bool operator==(const ScoreStats& s1, const ScoreStats& s2) return true; } -} \ No newline at end of file +} diff --git a/mert/Singleton.h b/mert/Singleton.h index f50925fa4..df1386650 100644 --- a/mert/Singleton.h +++ b/mert/Singleton.h @@ -21,10 +21,8 @@ public: } static void Delete() { - if (m_instance) { - delete m_instance; - m_instance = NULL; - } + delete m_instance; + m_instance = NULL; } private: diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp index 572dad097..111006981 100644 --- a/moses-chart-cmd/IOWrapper.cpp +++ b/moses-chart-cmd/IOWrapper.cpp @@ -50,7 +50,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "moses/FeatureVector.h" #include "moses/FF/StatefulFeatureFunction.h" #include "moses/FF/StatelessFeatureFunction.h" -#include "moses/FF/SyntaxConstraintFeature.h" +#include "moses/FF/TreeStructureFeature.h" #include "util/exception.hh" using namespace std; @@ -395,14 +395,16 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL, "No output file for tree fragments specified"); - //Tree of full sentence (to stderr) - const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); - for( size_t i=0; iGetScoreProducerDescription() == "SyntaxConstraintFeature0") { - const TreeState* tree = dynamic_cast(hypo->GetFFState(i)); - out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n"; - break; + //Tree of full sentence + const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure(); + if (treeStructure != NULL) { + const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for( size_t i=0; i(hypo->GetFFState(i)); + out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n"; + break; + } } } diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index dac7e85a0..deccf74e4 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -97,7 +97,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range targetPhrase->SetTargetLHS(targetLHS); targetPhrase->SetAlignmentInfo("0-0"); - if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) { + if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.GetTreeStructure() != NULL) { targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); } diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index c73c0324b..6a2a488d9 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -52,8 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line) LexicalReordering::~LexicalReordering() { - if(m_table) - delete m_table; + delete m_table; delete m_configuration; } diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index 4cc56a9a6..33ce4c1f4 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -63,6 +63,11 @@ public: state = new DALM::State(*from.state); } + void reset(DALM::State *s){ + delete state; + state = s; + } + virtual int Compare(const FFState& other) const{ const DALMState &o = static_cast(other); if(state->get_count() < o.state->get_count()) return -1; @@ -82,11 +87,9 @@ public: class DALMChartState : public FFState { private: - size_t sourceStartPos; - size_t sourceEndPos; - size_t inputSize; - DALM::VocabId *prefixIDs; - size_t prefixLength; + const ChartHypothesis &hypo; + DALM::Fragment *prefixFragments; + unsigned short prefixLength; float prefixScore; DALMState *rightContext; bool isLarge; @@ -94,15 +97,13 @@ private: public: DALMChartState( const ChartHypothesis &hypo, - DALM::VocabId *prefixIDs, - size_t prefixLength, + DALM::Fragment *prefixFragments, + unsigned short prefixLength, float prefixScore, DALMState *rightContext, bool isLarge) - : sourceStartPos(hypo.GetCurrSourceRange().GetStartPos()), - sourceEndPos(hypo.GetCurrSourceRange().GetEndPos()), - inputSize(hypo.GetManager().GetSource().GetSize()), - prefixIDs(prefixIDs), + : hypo(hypo), + prefixFragments(prefixFragments), prefixLength(prefixLength), prefixScore(prefixScore), rightContext(rightContext), @@ -110,16 +111,16 @@ public: {} virtual ~DALMChartState(){ - if(prefixIDs != NULL) delete [] prefixIDs; - if(rightContext != NULL) delete rightContext; + delete [] prefixFragments; + delete rightContext; } - size_t GetPrefixLength() const{ + unsigned short GetPrefixLength() const{ return prefixLength; } - const DALM::VocabId *GetPrefixIDs() const{ - return prefixIDs; + const DALM::Fragment *GetPrefixFragments() const{ + return prefixFragments; } float GetPrefixScore() const{ @@ -137,17 +138,22 @@ public: virtual int Compare(const FFState& other) const{ const DALMChartState &o = static_cast(other); // prefix - if (sourceStartPos > 0) { // not for " ..." + if (hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for " ..." if (prefixLength != o.prefixLength){ return (prefixLength < o.prefixLength)?-1:1; } else { - int ret = memcmp(prefixIDs, o.prefixIDs, prefixLength); - if (ret != 0) return ret; + if(prefixLength > 0){ + DALM::Fragment &f = prefixFragments[prefixLength-1]; + DALM::Fragment &of = o.prefixFragments[prefixLength-1]; + int ret = DALM::compare_fragments(f, of); + if(ret != 0) return ret; + } } } // suffix - if (sourceEndPos < inputSize - 1) { // not for "... " + size_t inputSize = hypo.GetManager().GetSource().GetSize(); + if (hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... " int ret = o.rightContext->Compare(*rightContext); if (ret != 0) return ret; } @@ -323,8 +329,8 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu DALM::State *state = dalm_state->get_state(); size_t contextSize = m_nGramOrder-1; - DALM::VocabId *prefixIDs = new DALM::VocabId[contextSize]; - size_t prefixLength = 0; + DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize]; + unsigned short prefixLength = 0; bool isLarge = false; // initial language model scores @@ -350,11 +356,14 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu // state is finalized. isLarge = true; }else{ - float score = m_lm->query(wid, *state); - hypoScore += score; - if (!isLarge){ + if(isLarge){ + float score = m_lm->query(wid, *state); + hypoScore += score; + }else{ + float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); + prefixScore += score; - prefixIDs[prefixLength] = wid; + hypoScore += score; prefixLength++; if(prefixLength >= contextSize) isLarge = true; } @@ -374,8 +383,10 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu // get language model state dalm_state->reset(*prevState->GetRightContext()); state = dalm_state->get_state(); + prefixLength = prevState->GetPrefixLength(); - std::memcpy(prefixIDs, prevState->GetPrefixIDs(), sizeof(DALM::VocabId)*prefixLength); + const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); + std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength); isLarge = prevState->LargeEnough(); } phrasePos++; @@ -389,11 +400,12 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu // regular word if (!word.IsNonTerminal()) { DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); - float score = m_lm->query(wid, *state); - hypoScore += score; - if (!isLarge){ + if (isLarge) { + hypoScore += m_lm->query(wid, *state); + }else{ + float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); prefixScore += score; - prefixIDs[prefixLength] = wid; + hypoScore += score; prefixLength++; if(prefixLength >= contextSize) isLarge = true; } @@ -410,19 +422,22 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu static_cast(prevHypo->GetFFState(featureID)); size_t prevPrefixLength = prevState->GetPrefixLength(); - const DALM::VocabId *prevPrefixIDs = prevState->GetPrefixIDs(); - + const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); + DALM::Gap gap(*state); // score its prefix for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) { - DALM::VocabId wid = prevPrefixIDs[prefixPos]; - float score = m_lm->query(wid, *state); - hypoScore += score; - if (!isLarge){ + const DALM::Fragment &f = prevPrefixFragments[prefixPos]; + + if (isLarge) { + hypoScore += m_lm->query(f, *state, gap); + } else { + float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]); prefixScore += score; - prefixIDs[prefixLength] = wid; + hypoScore += score; prefixLength++; if(prefixLength >= contextSize) isLarge = true; } + gap.succ(); } // check if we are dealing with a large sub-phrase @@ -430,18 +445,22 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu // add its language model score hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]); hypoScore -= prevState->GetPrefixScore(); // remove overwrapped score. - - // copy language model state + // copy language model state dalm_state->reset(*prevState->GetRightContext()); state = dalm_state->get_state(); - } + } else { + DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state()); + m_lm->set_state(*state_new, *state, gap); + dalm_state->reset(state_new); + state = dalm_state->get_state(); + } } } // assign combined score to score breakdown out->Assign(this, TransformLMScore(hypoScore)); - return new DALMChartState(hypo, prefixIDs, prefixLength, prefixScore, dalm_state, isLarge); + return new DALMChartState(hypo, prefixFragments, prefixLength, prefixScore, dalm_state, isLarge); } bool LanguageModelDALM::IsUseable(const FactorMask &mask) const diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 782144360..9ab1564fc 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -66,6 +66,7 @@ StaticData::StaticData() ,m_lmEnableOOVFeature(false) ,m_isAlwaysCreateDirectTranslationOption(false) ,m_currentWeightSetting("default") + ,m_treeStructure(NULL) { m_xmlBrackets.first="<"; m_xmlBrackets.second=">"; @@ -1184,5 +1185,52 @@ void StaticData::CheckLEGACYPT() } +void StaticData::ResetWeights(const std::string &denseWeights, const std::string &sparseFile) +{ + m_allWeights = ScoreComponentCollection(); + + // dense weights + string name(""); + vector weights; + vector toks = Tokenize(denseWeights); + for (size_t i = 0; i < toks.size(); ++i) { + const string &tok = toks[i]; + + if (tok.substr(tok.size() - 1, 1) == "=") { + // start of new feature + + if (name != "") { + // save previous ff + const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name); + m_allWeights.Assign(&ff, weights); + weights.clear(); + } + + name = tok.substr(0, tok.size() - 1); + } else { + // a weight for curr ff + float weight = Scan(toks[i]); + weights.push_back(weight); + } + } + + const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name); + m_allWeights.Assign(&ff, weights); + + // sparse weights + InputFileStream sparseStrme(sparseFile); + string line; + while (getline(sparseStrme, line)) { + vector toks = Tokenize(line); + UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight"); + + vector names = Tokenize(toks[0], "_"); + UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName"); + + const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]); + m_allWeights.Assign(&ff, names[1], Scan(toks[1])); + } +} + } // namespace diff --git a/moses/StaticData.h b/moses/StaticData.h index def81afae..51db96958 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -221,6 +221,8 @@ protected: std::map > m_soft_matches_map; std::map > m_soft_matches_map_reverse; + const StatefulFeatureFunction* m_treeStructure; + public: bool IsAlwaysCreateDirectTranslationOption() const { @@ -756,6 +758,20 @@ public: bool AdjacentOnly() const { return m_adjacentOnly; } + + + void ResetWeights(const std::string &denseWeights, const std::string &sparseFile); + + + // need global access for output of tree structure + const StatefulFeatureFunction* GetTreeStructure() const { + return m_treeStructure; + } + + void SetTreeStructure(const StatefulFeatureFunction* treeStructure) { + m_treeStructure = treeStructure; + } + }; } diff --git a/moses/TranslationModel/DynSAInclude/onlineRLM.h b/moses/TranslationModel/DynSAInclude/onlineRLM.h index 929602399..1e7a9c2d6 100644 --- a/moses/TranslationModel/DynSAInclude/onlineRLM.h +++ b/moses/TranslationModel/DynSAInclude/onlineRLM.h @@ -43,10 +43,10 @@ public: alpha_[i] = i * log10(0.4); } ~OnlineRLM() { - if(alpha_) delete[] alpha_; + delete[] alpha_; if(bAdapting_) delete vocab_; else vocab_ = NULL; - if(cache_) delete cache_; + delete cache_; delete bPrefix_; delete bHit_; } diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp index b318561df..de0d7f646 100644 --- a/phrase-extract/consolidate-main.cpp +++ b/phrase-extract/consolidate-main.cpp @@ -235,8 +235,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC // SCORES ... string directScores, directSparseScores, indirectScores, indirectSparseScores; - breakdownCoreAndSparse( itemDirect[2], directScores, directSparseScores ); - breakdownCoreAndSparse( itemIndirect[2], indirectScores, indirectSparseScores ); + breakdownCoreAndSparse( itemDirect[3], directScores, directSparseScores ); + breakdownCoreAndSparse( itemIndirect[3], indirectScores, indirectSparseScores ); vector directCounts = tokenize(itemDirect[4].c_str()); vector indirectCounts = tokenize(itemIndirect[4].c_str()); @@ -307,7 +307,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC } // alignment - fileConsolidated << " ||| " << itemDirect[3]; + fileConsolidated << " ||| " << itemDirect[2]; // counts, for debugging fileConsolidated << "||| " << countE << " " << countF << " " << countEF; diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp index a54433448..e69de29bb 100644 --- a/phrase-extract/score-main.cpp +++ b/phrase-extract/score-main.cpp @@ -1,909 +0,0 @@ -/*********************************************************************** - Moses - factored phrase-based language decoder - Copyright (C) 2009 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - ***********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "SafeGetline.h" -#include "ScoreFeature.h" -#include "tables-core.h" -#include "ExtractionPhrasePair.h" -#include "score.h" -#include "InputFileStream.h" -#include "OutputFileStream.h" - -using namespace std; -using namespace MosesTraining; - -#define LINE_MAX_LENGTH 100000 - -namespace MosesTraining -{ -LexicalTable lexTable; -bool inverseFlag = false; -bool hierarchicalFlag = false; -bool pcfgFlag = false; -bool treeFragmentsFlag = false; -bool unpairedExtractFormatFlag = false; -bool conditionOnTargetLhsFlag = false; -bool wordAlignmentFlag = true; -bool goodTuringFlag = false; -bool kneserNeyFlag = false; -bool logProbFlag = false; -int negLogProb = 1; -#define COC_MAX 10 -bool lexFlag = true; -bool unalignedFlag = false; -bool unalignedFWFlag = false; -bool crossedNonTerm = false; -int countOfCounts[COC_MAX+1]; -int totalDistinct = 0; -float minCountHierarchical = 0; -std::map sourceLHSCounts; -std::map* > targetLHSAndSourceLHSJointCounts; - -std::set sourceLabelSet; -std::map sourceLabels; -std::vector sourceLabelsByIndex; - -Vocabulary vcbT; -Vocabulary vcbS; - -} // namespace - -std::vector tokenize( const char [] ); - -void processLine( std::string line, - int lineID, bool includeSentenceIdFlag, int &sentenceId, - PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, - std::string &additionalPropertiesString, - float &count, float &pcfgSum ); -void writeCountOfCounts( const std::string &fileNameCountOfCounts ); -void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, - const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb ); -void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog ); -double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ); -double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource ); -set functionWordList; -void loadFunctionWords( const string &fileNameFunctionWords ); -double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ); -int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ); -void printSourcePhrase( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, ostream &out ); -void printTargetPhrase( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, ostream &out ); -void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment ); - - -int main(int argc, char* argv[]) -{ - std::cerr << "Score v2.1 -- " - << "scoring methods for extracted rules" << std::endl; - - ScoreFeatureManager featureManager; - if (argc < 4) { - std::cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--TreeFragments] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl; - std::cerr << featureManager.usage() << std::endl; - exit(1); - } - std::string fileNameExtract = argv[1]; - std::string fileNameLex = argv[2]; - std::string fileNamePhraseTable = argv[3]; - std::string fileNameCountOfCounts; - std::string fileNameFunctionWords; - std::vector featureArgs; // all unknown args passed to feature manager - - for(int i=4; iOpen(fileNamePhraseTable); - if (!success) { - std::cerr << "ERROR: could not open file phrase table file " - << fileNamePhraseTable << std::endl; - exit(1); - } - phraseTableFile = outputFile; - } - - // loop through all extracted phrase translations - char line[LINE_MAX_LENGTH], lastLine[LINE_MAX_LENGTH]; - lastLine[0] = '\0'; - ExtractionPhrasePair *phrasePair = NULL; - std::vector< ExtractionPhrasePair* > phrasePairsWithSameSource; - std::vector< ExtractionPhrasePair* > phrasePairsWithSameSourceAndTarget; // required for hierarchical rules only, as non-terminal alignments might make the phrases incompatible - - int tmpSentenceId; - PHRASE *tmpPhraseSource, *tmpPhraseTarget; - ALIGNMENT *tmpTargetToSourceAlignment; - std::string tmpAdditionalPropertiesString; - float tmpCount=0.0f, tmpPcfgSum=0.0f; - - int i=0; - SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ ); - if ( !extractFileP.eof() ) { - ++i; - tmpPhraseSource = new PHRASE(); - tmpPhraseTarget = new PHRASE(); - tmpTargetToSourceAlignment = new ALIGNMENT(); - processLine( std::string(line), - i, featureManager.includeSentenceId(), tmpSentenceId, - tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, - tmpAdditionalPropertiesString, - tmpCount, tmpPcfgSum); - phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, - tmpTargetToSourceAlignment, - tmpCount, tmpPcfgSum ); - phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount ); - featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId ); - phrasePairsWithSameSource.push_back( phrasePair ); - if ( hierarchicalFlag ) { - phrasePairsWithSameSourceAndTarget.push_back( phrasePair ); - } - strcpy( lastLine, line ); - SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ ); - } - - while ( !extractFileP.eof() ) { - - if ( ++i % 100000 == 0 ) { - std::cerr << "." << std::flush; - } - - // identical to last line? just add count - if (strcmp(line,lastLine) == 0) { - phrasePair->IncrementPrevious(tmpCount,tmpPcfgSum); - SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - continue; - } else { - strcpy( lastLine, line ); - } - - tmpPhraseSource = new PHRASE(); - tmpPhraseTarget = new PHRASE(); - tmpTargetToSourceAlignment = new ALIGNMENT(); - tmpAdditionalPropertiesString.clear(); - processLine( std::string(line), - i, featureManager.includeSentenceId(), tmpSentenceId, - tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, - tmpAdditionalPropertiesString, - tmpCount, tmpPcfgSum); - - bool matchesPrevious = false; - bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these, - // ExtractionPhrasePair::Matches() checks them in order and does not continue with the others - // once the first of them has been found to have to be set to false - - if ( hierarchicalFlag ) { - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter = phrasePairsWithSameSourceAndTarget.begin(); - iter != phrasePairsWithSameSourceAndTarget.end(); ++iter ) { - if ( (*iter)->Matches( tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, - sourceMatch, targetMatch, alignmentMatch ) ) { - matchesPrevious = true; - phrasePair = (*iter); - break; - } - } - } else { - if ( phrasePair->Matches( tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, - sourceMatch, targetMatch, alignmentMatch ) ) { - matchesPrevious = true; - } - } - - if ( matchesPrevious ) { - delete tmpPhraseSource; - delete tmpPhraseTarget; - if ( !phrasePair->Add( tmpTargetToSourceAlignment, - tmpCount, tmpPcfgSum ) ) { - delete tmpTargetToSourceAlignment; - } - phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount ); - featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId ); - } else { - - if ( !phrasePairsWithSameSource.empty() && - !sourceMatch ) { - processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb ); - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); - iter!=phrasePairsWithSameSource.end(); ++iter) { - delete *iter; - } - phrasePairsWithSameSource.clear(); - if ( hierarchicalFlag ) { - phrasePairsWithSameSourceAndTarget.clear(); - } - } - - if ( hierarchicalFlag ) { - if ( !phrasePairsWithSameSourceAndTarget.empty() && - !targetMatch ) { - phrasePairsWithSameSourceAndTarget.clear(); - } - } - - phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, - tmpTargetToSourceAlignment, - tmpCount, tmpPcfgSum ); - phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount ); - featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId ); - phrasePairsWithSameSource.push_back(phrasePair); - - if ( hierarchicalFlag ) { - phrasePairsWithSameSourceAndTarget.push_back(phrasePair); - } - } - - SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - - } - - processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb ); - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); - iter!=phrasePairsWithSameSource.end(); ++iter) { - delete *iter; - } - phrasePairsWithSameSource.clear(); - - - phraseTableFile->flush(); - if (phraseTableFile != &std::cout) { - delete phraseTableFile; - } - - // output count of count statistics - if (goodTuringFlag || kneserNeyFlag) { - writeCountOfCounts( fileNameCountOfCounts ); - } -} - - -void processLine( std::string line, - int lineID, bool includeSentenceIdFlag, int &sentenceId, - PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, - std::string &additionalPropertiesString, - float &count, float &pcfgSum ) -{ - size_t foundAdditionalProperties = line.find("{{"); - if (foundAdditionalProperties != std::string::npos) { - additionalPropertiesString = line.substr(foundAdditionalProperties); - line = line.substr(0,foundAdditionalProperties); - } else { - additionalPropertiesString.clear(); - } - - phraseSource->clear(); - phraseTarget->clear(); - targetToSourceAlignment->clear(); - - std::vector token = tokenize( line.c_str() ); - int item = 1; - for ( size_t j=0; jpush_back( vcbS.storeIfNew( token[j] ) ); - } else if (item == 2) { // target phrase - phraseTarget->push_back( vcbT.storeIfNew( token[j] ) ); - } else if (item == 3) { // alignment - int s,t; - sscanf(token[j].c_str(), "%d-%d", &s, &t); - if ((size_t)t >= phraseTarget->size() || (size_t)s >= phraseSource->size()) { - std::cerr << "WARNING: phrase pair " << lineID - << " has alignment point (" << s << ", " << t << ")" - << " out of bounds (" << phraseSource->size() << ", " << phraseTarget->size() << ")" - << std::endl; - } else { - // first alignment point? -> initialize - if ( targetToSourceAlignment->size() == 0 ) { - size_t numberOfTargetSymbols = (hierarchicalFlag ? phraseTarget->size()-1 : phraseTarget->size()); - targetToSourceAlignment->resize(numberOfTargetSymbols); - } - // add alignment point - targetToSourceAlignment->at(t).insert(s); - } - } else if (includeSentenceIdFlag && item == 4) { // optional sentence id - sscanf(token[j].c_str(), "%d", &sentenceId); - } else if (item + (includeSentenceIdFlag?-1:0) == 4) { // count - sscanf(token[j].c_str(), "%f", &count); - } else if (item + (includeSentenceIdFlag?-1:0) == 5) { // target syntax PCFG score - float pcfgScore = std::atof(token[j].c_str()); - pcfgSum = pcfgScore * count; - } - } - - if ( targetToSourceAlignment->size() == 0 ) { - size_t numberOfTargetSymbols = (hierarchicalFlag ? phraseTarget->size()-1 : phraseTarget->size()); - targetToSourceAlignment->resize(numberOfTargetSymbols); - } - - if (item + (includeSentenceIdFlag?-1:0) == 3) { - count = 1.0; - } - if (item < 3 || item > 6) { - std::cerr << "ERROR: faulty line " << lineID << ": " << line << endl; - } - -} - - -void writeCountOfCounts( const string &fileNameCountOfCounts ) -{ - // open file - Moses::OutputFileStream countOfCountsFile; - bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str()); - if (!success) { - std::cerr << "ERROR: could not open count-of-counts file " - << fileNameCountOfCounts << std::endl; - return; - } - - // Kneser-Ney needs the total number of phrase pairs - countOfCountsFile << totalDistinct << std::endl; - - // write out counts - for(int i=1; i<=COC_MAX; i++) { - countOfCountsFile << countOfCounts[ i ] << std::endl; - } - countOfCountsFile.Close(); -} - - -void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, - const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb ) -{ - if (phrasePairsWithSameSource.size() == 0) { - return; - } - - float totalSource = 0; - - //std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl; - - // loop through phrase pairs - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); - iter!=phrasePairsWithSameSource.end(); ++iter) { - // add to total count - totalSource += (*iter)->GetCount(); - } - - // output the distinct phrase pairs, one at a time - for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); - iter!=phrasePairsWithSameSource.end(); ++iter) { - // add to total count - outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb ); - } -} - -void outputPhrasePair(const ExtractionPhrasePair &phrasePair, - float totalCount, int distinctCount, - ostream &phraseTableFile, - const ScoreFeatureManager& featureManager, - const MaybeLog& maybeLogProb ) -{ - assert(phrasePair.IsValid()); - - const ALIGNMENT *bestAlignmentT2S = phrasePair.FindBestAlignmentTargetToSource(); - float count = phrasePair.GetCount(); - - map< string, float > domainCount; - - // collect count of count statistics - if (goodTuringFlag || kneserNeyFlag) { - totalDistinct++; - int countInt = count + 0.99999; - if (countInt <= COC_MAX) - countOfCounts[ countInt ]++; - } - - // compute PCFG score - float pcfgScore = 0; - if (pcfgFlag && !inverseFlag) { - pcfgScore = phrasePair.GetPcfgScore() / count; - } - - // output phrases - const PHRASE *phraseSource = phrasePair.GetSource(); - const PHRASE *phraseTarget = phrasePair.GetTarget(); - - // do not output if hierarchical and count below threshold - if (hierarchicalFlag && count < minCountHierarchical) { - for(size_t j=0; jsize()-1; ++j) { - if (isNonTerminal(vcbS.getWord( phraseSource->at(j) ))) - return; - } - } - - // source phrase (unless inverse) - if (!inverseFlag) { - printSourcePhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile); - phraseTableFile << " ||| "; - } - - // target phrase - printTargetPhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile); - phraseTableFile << " ||| "; - - // source phrase (if inverse) - if (inverseFlag) { - printSourcePhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile); - phraseTableFile << " ||| "; - } - - // lexical translation probability - if (lexFlag) { - double lexScore = computeLexicalTranslation( phraseSource, phraseTarget, bestAlignmentT2S ); - phraseTableFile << maybeLogProb( lexScore ); - } - - // unaligned word penalty - if (unalignedFlag) { - double penalty = computeUnalignedPenalty( bestAlignmentT2S ); - phraseTableFile << " " << maybeLogProb( penalty ); - } - - // unaligned function word penalty - if (unalignedFWFlag) { - double penalty = computeUnalignedFWPenalty( phraseTarget, bestAlignmentT2S ); - phraseTableFile << " " << maybeLogProb( penalty ); - } - - if (crossedNonTerm && !inverseFlag) { - phraseTableFile << " " << calcCrossedNonTerm( phraseTarget, bestAlignmentT2S ); - } - - // target-side PCFG score - if (pcfgFlag && !inverseFlag) { - phraseTableFile << " " << maybeLogProb( pcfgScore ); - } - - // extra features - ScoreFeatureContext context(phrasePair, maybeLogProb); - std::vector extraDense; - map extraSparse; - featureManager.addFeatures(context, extraDense, extraSparse); - for (size_t i = 0; i < extraDense.size(); ++i) { - phraseTableFile << " " << extraDense[i]; - } - - for (map::const_iterator i = extraSparse.begin(); - i != extraSparse.end(); ++i) { - phraseTableFile << " " << i->first << " " << i->second; - } - - phraseTableFile << " ||| "; - - // output alignment info - if ( !inverseFlag ) { - if ( hierarchicalFlag ) { - // always output alignment if hiero style - assert(phraseTarget->size() == bestAlignmentT2S->size()+1); - std::vector alignment; - for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) { - if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) { - if ( bestAlignmentT2S->at(j).size() != 1 ) { - std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl; - phraseTableFile.flush(); - assert(bestAlignmentT2S->at(j).size() == 1); - } - size_t sourcePos = *(bestAlignmentT2S->at(j).begin()); - //phraseTableFile << sourcePos << "-" << j << " "; - std::stringstream point; - point << sourcePos << "-" << j; - alignment.push_back(point.str()); - } else { - for ( std::set::iterator setIter = (bestAlignmentT2S->at(j)).begin(); - setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) { - size_t sourcePos = *setIter; - std::stringstream point; - point << sourcePos << "-" << j; - alignment.push_back(point.str()); - } - } - } - // now print all alignments, sorted by source index - sort(alignment.begin(), alignment.end()); - for (size_t i = 0; i < alignment.size(); ++i) { - phraseTableFile << alignment[i] << " "; - } - } else if (wordAlignmentFlag) { - // alignment info in pb model - for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) { - for ( std::set::iterator setIter = (bestAlignmentT2S->at(j)).begin(); - setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) { - size_t sourcePos = *setIter; - phraseTableFile << sourcePos << "-" << j << " "; - } - } - } - } - - // counts - phraseTableFile << " ||| " << totalCount << " " << count; - if (kneserNeyFlag) - phraseTableFile << " " << distinctCount; - - if ((treeFragmentsFlag) && - !inverseFlag) { - phraseTableFile << " |||"; - } - - // tree fragments - if (treeFragmentsFlag && !inverseFlag) { - const std::string *bestTreeFragment = phrasePair.FindBestPropertyValue("Tree"); - if (bestTreeFragment) { - phraseTableFile << " {{Tree " << *bestTreeFragment << "}}"; - } - } - - phraseTableFile << std::endl; -} - - - -bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource ) -{ - for (size_t currTarget = 0; currTarget < alignmentTargetToSource->size(); ++currTarget) { - if (currTarget == targetPos) { - // skip - } else { - const std::set &sourceSet = alignmentTargetToSource->at(currTarget); - for (std::set::const_iterator iter = sourceSet.begin(); - iter != sourceSet.end(); ++iter) { - size_t currSource = *iter; - - if ((currTarget < targetPos && currSource > sourcePos) - || (currTarget > targetPos && currSource < sourcePos) - ) { - return true; - } - } - - } - } - - return false; -} - -int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ) -{ - assert(phraseTarget->size() >= alignmentTargetToSource->size() ); - - for (size_t targetPos = 0; targetPos < alignmentTargetToSource->size(); ++targetPos) { - - if ( isNonTerminal(vcbT.getWord( phraseTarget->at(targetPos) ))) { - const std::set &alignmentPoints = alignmentTargetToSource->at(targetPos); - assert( alignmentPoints.size() == 1 ); - size_t sourcePos = *alignmentPoints.begin(); - bool ret = calcCrossedNonTerm(targetPos, sourcePos, alignmentTargetToSource); - if (ret) - return 1; - } - } - - return 0; -} - - -double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource ) -{ - // unaligned word counter - double unaligned = 1.0; - // only checking target words - source words are caught when computing inverse - for(size_t ti=0; tisize(); ++ti) { - const set< size_t > & srcIndices = alignmentTargetToSource->at(ti); - if (srcIndices.empty()) { - unaligned *= 2.718; - } - } - return unaligned; -} - - -double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ) -{ - // unaligned word counter - double unaligned = 1.0; - // only checking target words - source words are caught when computing inverse - for(size_t ti=0; tisize(); ++ti) { - const set< size_t > & srcIndices = alignmentTargetToSource->at(ti); - if (srcIndices.empty() && functionWordList.find( vcbT.getWord( phraseTarget->at(ti) ) ) != functionWordList.end()) { - unaligned *= 2.718; - } - } - return unaligned; -} - -void loadFunctionWords( const string &fileName ) -{ - std::cerr << "Loading function word list from " << fileName; - ifstream inFile; - inFile.open(fileName.c_str()); - if (inFile.fail()) { - std::cerr << " - ERROR: could not open file" << std::endl; - exit(1); - } - istream *inFileP = &inFile; - - char line[LINE_MAX_LENGTH]; - while(true) { - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - std::vector token = tokenize( line ); - if (token.size() > 0) - functionWordList.insert( token[0] ); - } - - std::cerr << " - read " << functionWordList.size() << " function words" << std::endl; - inFile.close(); -} - - -double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource ) -{ - // lexical translation probability - double lexScore = 1.0; - int null = vcbS.getWordID("NULL"); - // all target words have to be explained - for(size_t ti=0; tisize(); ti++) { - const set< size_t > & srcIndices = alignmentTargetToSource->at(ti); - if (srcIndices.empty()) { - // explain unaligned word by NULL - lexScore *= lexTable.permissiveLookup( null, phraseTarget->at(ti) ); - } else { - // go through all the aligned words to compute average - double thisWordScore = 0; - for (set< size_t >::const_iterator p(srcIndices.begin()); p != srcIndices.end(); ++p) { - thisWordScore += lexTable.permissiveLookup( phraseSource->at(*p), phraseTarget->at(ti) ); - } - lexScore *= thisWordScore / (double)srcIndices.size(); - } - } - return lexScore; -} - - -void LexicalTable::load( const string &fileName ) -{ - std::cerr << "Loading lexical translation table from " << fileName; - ifstream inFile; - inFile.open(fileName.c_str()); - if (inFile.fail()) { - std::cerr << " - ERROR: could not open file" << std::endl; - exit(1); - } - istream *inFileP = &inFile; - - char line[LINE_MAX_LENGTH]; - - int i=0; - while(true) { - i++; - if (i%100000 == 0) std::cerr << "." << flush; - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - - std::vector token = tokenize( line ); - if (token.size() != 3) { - std::cerr << "line " << i << " in " << fileName - << " has wrong number of tokens, skipping:" << std::endl - << token.size() << " " << token[0] << " " << line << std::endl; - continue; - } - - double prob = atof( token[2].c_str() ); - WORD_ID wordT = vcbT.storeIfNew( token[0] ); - WORD_ID wordS = vcbS.storeIfNew( token[1] ); - ltable[ wordS ][ wordT ] = prob; - } - std::cerr << std::endl; -} - - -void printSourcePhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget, - const ALIGNMENT *targetToSourceAlignment, ostream &out) -{ - // get corresponding target non-terminal and output pair - ALIGNMENT *sourceToTargetAlignment = new ALIGNMENT(); - invertAlignment(phraseSource, phraseTarget, targetToSourceAlignment, sourceToTargetAlignment); - // output source symbols, except root, in rule table format - for (std::size_t i = 0; i < phraseSource->size()-1; ++i) { - const std::string &word = vcbS.getWord(phraseSource->at(i)); - if (!unpairedExtractFormatFlag || !isNonTerminal(word)) { - out << word << " "; - continue; - } - const std::set &alignmentPoints = sourceToTargetAlignment->at(i); - assert(alignmentPoints.size() == 1); - size_t j = *(alignmentPoints.begin()); - if (inverseFlag) { - out << vcbT.getWord(phraseTarget->at(j)) << word << " "; - } else { - out << word << vcbT.getWord(phraseTarget->at(j)) << " "; - } - } - // output source root symbol - if (conditionOnTargetLhsFlag && !inverseFlag) { - out << "[X]"; - } else { - out << vcbS.getWord(phraseSource->back()); - } - delete sourceToTargetAlignment; -} - - -void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget, - const ALIGNMENT *targetToSourceAlignment, ostream &out) -{ - // output target symbols, except root, in rule table format - for (std::size_t i = 0; i < phraseTarget->size()-1; ++i) { - const std::string &word = vcbT.getWord(phraseTarget->at(i)); - if (!unpairedExtractFormatFlag || !isNonTerminal(word)) { - out << word << " "; - continue; - } - // get corresponding source non-terminal and output pair - std::set alignmentPoints = targetToSourceAlignment->at(i); - assert(alignmentPoints.size() == 1); - int j = *(alignmentPoints.begin()); - if (inverseFlag) { - out << word << vcbS.getWord(phraseSource->at(j)) << " "; - } else { - out << vcbS.getWord(phraseSource->at(j)) << word << " "; - } - } - // output target root symbol - if (conditionOnTargetLhsFlag) { - if (inverseFlag) { - out << "[X]"; - } else { - out << vcbS.getWord(phraseSource->back()); - } - } else { - out << vcbT.getWord(phraseTarget->back()); - } -} - - -void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget, - const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) { -// typedef std::vector< std::set > ALIGNMENT; - - outSourceToTargetAlignment->clear(); - size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size()); - outSourceToTargetAlignment->resize(numberOfSourceSymbols); - // add alignment point - for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) { - for ( std::set::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin(); - setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) { - size_t sourcePosition = *setIter; - outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition); - } - } -} -