From b9bef2fc44fce020f78018cbc71f2f375f5071ee Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Jan 2015 10:26:12 +0400 Subject: [PATCH] add oovpt --- contrib/other-builds/moses/.project | 10 ++ .../ChartRuleLookupManagerOOVPT.cpp | 102 ++++++++++++++++++ .../ChartRuleLookupManagerOOVPT.h | 57 ++++++++++ moses/TranslationModel/OOVPT.cpp | 67 +++++++++--- moses/TranslationModel/OOVPT.h | 2 +- 5 files changed, 220 insertions(+), 18 deletions(-) create mode 100644 moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp create mode 100644 moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 4ad57e348..0202708c3 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -2455,6 +2455,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h + + TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp + + + TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h + TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp 1 diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp new file mode 100644 index 000000000..f348ea62d --- /dev/null +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp @@ -0,0 +1,102 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include +#include "ChartRuleLookupManagerOOVPT.h" +#include "DotChartInMemory.h" + +#include "moses/Util.h" +#include "moses/ChartParser.h" +#include "moses/InputType.h" +#include "moses/ChartParserCallback.h" +#include "moses/StaticData.h" +#include "moses/NonTerminal.h" +#include "moses/ChartCellCollection.h" +#include "moses/TranslationModel/PhraseDictionaryMemory.h" +#include "moses/TranslationModel/OOVPT.h" + +using namespace std; + +namespace Moses +{ + +ChartRuleLookupManagerOOVPT::ChartRuleLookupManagerOOVPT( + const ChartParser &parser, + const ChartCellCollectionBase &cellColl, + const OOVPT &oovPt) + : ChartRuleLookupManager(parser, cellColl) + , m_oovPt(oovPt) +{ + cerr << "starting ChartRuleLookupManagerOOVPT" << endl; +} + +ChartRuleLookupManagerOOVPT::~ChartRuleLookupManagerOOVPT() +{ + RemoveAllInColl(m_tpColl); +} + +void ChartRuleLookupManagerOOVPT::GetChartRuleCollection( + const WordsRange &range, + size_t last, + ChartParserCallback &outColl) +{ + //m_tpColl.push_back(TargetPhraseCollection()); + //TargetPhraseCollection &tpColl = m_tpColl.back(); + TargetPhraseCollection *tpColl = new TargetPhraseCollection(); + m_tpColl.push_back(tpColl); + + if (range.GetNumWordsCovered() == 1) { + const ChartCellLabel &sourceWordLabel = GetSourceAt(range.GetStartPos()); + const Word &sourceWord = sourceWordLabel.GetLabel(); + CreateTargetPhrases(sourceWord, *tpColl); + } + + outColl.Add(*tpColl, m_stackVec, range); +} + +void ChartRuleLookupManagerOOVPT::CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const +{ + const StaticData &staticData = StaticData::Instance(); + const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); + UnknownLHSList::const_iterator iterLHS; + for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) { + const string &targetLHSStr = iterLHS->first; + float prob = iterLHS->second; + + // lhs + //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal(); + Word *targetLHS = new Word(true); + + targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true); + UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS"); + + // add to dictionary + TargetPhrase *targetPhrase = m_oovPt.CreateTargetPhrase(sourceWord); + + //targetPhrase->EvaluateInIsolation(*unksrc); + targetPhrase->SetTargetLHS(targetLHS); + if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) { + targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); + } + + // chart rule + tpColl.Add(targetPhrase); + } +} +} // namespace Moses diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h new file mode 100644 index 000000000..fb9fdbabf --- /dev/null +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h @@ -0,0 +1,57 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#include +#include "moses/ChartRuleLookupManager.h" +#include "moses/StackVec.h" + +namespace Moses +{ +class TargetPhraseCollection; +class ChartParserCallback; +class DottedRuleColl; +class WordsRange; +class OOVPT; + +class ChartRuleLookupManagerOOVPT : public ChartRuleLookupManager +{ +public: + ChartRuleLookupManagerOOVPT(const ChartParser &parser, + const ChartCellCollectionBase &cellColl, + const OOVPT &oovPt); + + ~ChartRuleLookupManagerOOVPT(); + + virtual void GetChartRuleCollection( + const WordsRange &range, + size_t last, + ChartParserCallback &outColl); + +private: + void CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const; + + StackVec m_stackVec; + std::vector m_tpColl; + const OOVPT &m_oovPt; +}; + +} // namespace Moses + diff --git a/moses/TranslationModel/OOVPT.cpp b/moses/TranslationModel/OOVPT.cpp index c66e6ba1b..031221305 100644 --- a/moses/TranslationModel/OOVPT.cpp +++ b/moses/TranslationModel/OOVPT.cpp @@ -1,5 +1,7 @@ // vim:tabstop=2 #include "OOVPT.h" +#include "moses/StaticData.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h" using namespace std; @@ -29,8 +31,11 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { InputPath &inputPath = **iter; const Phrase &sourcePhrase = inputPath.GetPhrase(); + const Word &sourceWord = sourcePhrase.GetWord(0); + + TargetPhrase *tp = CreateTargetPhrase(sourceWord); + tp->EvaluateInIsolation(sourcePhrase); - TargetPhrase *tp = CreateTargetPhrase(sourcePhrase); TargetPhraseCollection *tpColl = new TargetPhraseCollection(); tpColl->Add(tp); @@ -43,34 +48,62 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) } } -TargetPhrase *OOVPT::CreateTargetPhrase(const Phrase &sourcePhrase) const +TargetPhrase *OOVPT::CreateTargetPhrase(const Word &sourceWord) const { - // create a target phrase from the 1st word of the source, prefix with 'OOVPT:' - assert(sourcePhrase.GetSize()); - assert(m_output.size() == 1); + // unknown word, add as trans opt + const StaticData &staticData = StaticData::Instance(); + FactorCollection &factorCollection = FactorCollection::Instance(); - string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string(); + size_t isDigit = 0; - TargetPhrase *tp = new TargetPhrase(this); - Word &word = tp->AddWord(); - word.CreateFromString(Output, m_output, str, false); + const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface + const StringPiece s = f->GetString(); + bool isEpsilon = (s=="" || s==EPSILON); + if (staticData.GetDropUnknown()) { + isDigit = s.find_first_of("0123456789"); + if (isDigit == string::npos) + isDigit = 0; + else + isDigit = 1; + // modify the starting bitmap + } - // score for this phrase table - vector scores(m_numScoreComponents, 1.3); - tp->GetScoreBreakdown().PlusEquals(this, scores); + TargetPhrase *targetPhrase = new TargetPhrase(this); - // score of all other ff when this rule is being loaded - tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply()); + if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) { + // add to dictionary - return tp; + Word &targetWord = targetPhrase->AddWord(); + targetWord.SetIsOOV(true); + + for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) { + FactorType factorType = static_cast(currFactor); + + const Factor *sourceFactor = sourceWord[currFactor]; + if (sourceFactor == NULL) + targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR); + else + targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString()); + } + //create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation + + targetPhrase->SetAlignmentInfo("0-0"); + + } else { + // drop source word. create blank target phrase + } + + float unknownScore = FloorScore(TransformScore(0)); + targetPhrase->GetScoreBreakdown().Assign(this, unknownScore); + + return targetPhrase; } ChartRuleLookupManager* OOVPT::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) { - assert(false); - return NULL; + return new ChartRuleLookupManagerOOVPT(parser, cellCollection, *this); } TO_STRING_BODY(OOVPT); diff --git a/moses/TranslationModel/OOVPT.h b/moses/TranslationModel/OOVPT.h index 04e53c6ec..e4e96e67a 100644 --- a/moses/TranslationModel/OOVPT.h +++ b/moses/TranslationModel/OOVPT.h @@ -28,9 +28,9 @@ public: TO_STRING(); + TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const; protected: - TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; }; } // namespace Moses