delete oov pt

This commit is contained in:
Hieu Hoang 2015-01-09 22:32:08 +00:00
parent a8d4b81e71
commit be0ab92d16
10 changed files with 2 additions and 357 deletions

View File

@ -1985,16 +1985,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSuffixArray.h</locationURI>
</link>
<link>
<name>TranslationModel/OOVPT.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/OOVPT.cpp</locationURI>
</link>
<link>
<name>TranslationModel/OOVPT.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/OOVPT.h</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionary.cpp</name>
<type>1</type>
@ -2455,16 +2445,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
<type>1</type>

View File

@ -9,7 +9,6 @@
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
#include "moses/TranslationModel/OOVPT.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
@ -184,7 +183,6 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME2("RuleTable", Syntax::RuleTableFF);
MOSES_FNAME(OOVPT);
MOSES_FNAME(GlobalLexicalModel);
//MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original

View File

@ -635,7 +635,7 @@ void StaticData::LoadDecodeGraphs()
void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const vector<size_t> &maxChartSpans)
{
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetCollExclOOVPt();
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();

View File

@ -1,104 +0,0 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include "ChartRuleLookupManagerOOVPT.h"
#include "DotChartInMemory.h"
#include "moses/Util.h"
#include "moses/ChartParser.h"
#include "moses/InputType.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/OOVPT.h"
using namespace std;
namespace Moses
{
ChartRuleLookupManagerOOVPT::ChartRuleLookupManagerOOVPT(
const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const OOVPT &oovPt)
: ChartRuleLookupManager(parser, cellColl)
, m_oovPt(oovPt)
{
cerr << "starting ChartRuleLookupManagerOOVPT" << endl;
}
ChartRuleLookupManagerOOVPT::~ChartRuleLookupManagerOOVPT()
{
RemoveAllInColl(m_tpColl);
}
void ChartRuleLookupManagerOOVPT::GetChartRuleCollection(
const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
{
const WordsRange &range = inputPath.GetWordsRange();
//m_tpColl.push_back(TargetPhraseCollection());
//TargetPhraseCollection &tpColl = m_tpColl.back();
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
m_tpColl.push_back(tpColl);
if (range.GetNumWordsCovered() == 1) {
const ChartCellLabel &sourceWordLabel = GetSourceAt(range.GetStartPos());
const Word &sourceWord = sourceWordLabel.GetLabel();
CreateTargetPhrases(sourceWord, *tpColl);
}
outColl.Add(*tpColl, m_stackVec, range);
}
void ChartRuleLookupManagerOOVPT::CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const
{
const StaticData &staticData = StaticData::Instance();
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
float prob = iterLHS->second;
// lhs
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
TargetPhrase *targetPhrase = m_oovPt.CreateTargetPhrase(sourceWord);
//targetPhrase->EvaluateInIsolation(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}
// chart rule
tpColl.Add(targetPhrase);
}
}
} // namespace Moses

View File

@ -1,57 +0,0 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <vector>
#include "moses/ChartRuleLookupManager.h"
#include "moses/StackVec.h"
namespace Moses
{
class TargetPhraseCollection;
class ChartParserCallback;
class DottedRuleColl;
class WordsRange;
class OOVPT;
class ChartRuleLookupManagerOOVPT : public ChartRuleLookupManager
{
public:
ChartRuleLookupManagerOOVPT(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const OOVPT &oovPt);
~ChartRuleLookupManagerOOVPT();
virtual void GetChartRuleCollection(
const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl);
private:
void CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const;
StackVec m_stackVec;
std::vector<TargetPhraseCollection*> m_tpColl;
const OOVPT &m_oovPt;
};
} // namespace Moses

View File

@ -1,117 +0,0 @@
// vim:tabstop=2
#include "OOVPT.h"
#include "moses/StaticData.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h"
using namespace std;
namespace Moses
{
OOVPT::OOVPT(const std::string &line)
: PhraseDictionary(line)
{
ReadParameters();
}
void OOVPT::Load()
{
SetFeaturesToApply();
}
void OOVPT::InitializeForInput(InputType const& source)
{
ReduceCache();
}
void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
CacheColl &cache = GetCache();
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
const Phrase &sourcePhrase = inputPath.GetPhrase();
const Word &sourceWord = sourcePhrase.GetWord(0);
TargetPhrase *tp = CreateTargetPhrase(sourceWord);
tp->EvaluateInIsolation(sourcePhrase);
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
tpColl->Add(tp);
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
}
TargetPhrase *OOVPT::CreateTargetPhrase(const Word &sourceWord) const
{
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
FactorCollection &factorCollection = FactorCollection::Instance();
size_t isDigit = 0;
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const StringPiece s = f->GetString();
bool isEpsilon = (s=="" || s==EPSILON);
if (staticData.GetDropUnknown()) {
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
isDigit = 1;
// modify the starting bitmap
}
TargetPhrase *targetPhrase = new TargetPhrase(this);
if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
// add to dictionary
Word &targetWord = targetPhrase->AddWord();
targetWord.SetIsOOV(true);
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *sourceFactor = sourceWord[currFactor];
if (sourceFactor == NULL)
targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR);
else
targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString());
}
//create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation
targetPhrase->SetAlignmentInfo("0-0");
} else {
// drop source word. create blank target phrase
}
float unknownScore = FloorScore(TransformScore(0));
targetPhrase->GetScoreBreakdown().Assign(this, unknownScore);
return targetPhrase;
}
ChartRuleLookupManager* OOVPT::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
return new ChartRuleLookupManagerOOVPT(parser, cellCollection, *this);
}
TO_STRING_BODY(OOVPT);
// friend
ostream& operator<<(ostream& out, const OOVPT& phraseDict)
{
return out;
}
}

View File

@ -1,36 +0,0 @@
#pragma once
#include "PhraseDictionary.h"
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
class OOVPT : public PhraseDictionary
{
friend std::ostream& operator<<(std::ostream&, const OOVPT&);
public:
OOVPT(const std::string &line);
void Load();
void InitializeForInput(InputType const& source);
// for phrase-based model
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
// for syntax/hiero model (CKY+ decoding)
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
TO_STRING();
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
protected:
};
} // namespace Moses

View File

@ -28,7 +28,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/DecodeGraph.h"
#include "moses/InputPath.h"
#include "util/exception.hh"
#include "OOVPT.h"
using namespace std;
@ -45,22 +44,6 @@ CacheColl::~CacheColl()
}
}
std::vector<PhraseDictionary*> PhraseDictionary::GetCollExclOOVPt()
{
std::vector<PhraseDictionary*> ret = GetColl();
std::vector<PhraseDictionary*>::iterator iter;
for (iter = ret.begin(); iter != ret.end(); ++iter) {
PhraseDictionary *pt = *iter;
if (typeid(*pt) == typeid(OOVPT&)) {
ret.erase(iter);
break;
}
}
return ret;
}
PhraseDictionary::PhraseDictionary(const std::string &line)
:DecodeFeature(line)
,m_tableLimit(20) // default

View File

@ -77,8 +77,6 @@ public:
return s_staticColl;
}
static std::vector<PhraseDictionary*> GetCollExclOOVPt();
PhraseDictionary(const std::string &line);
virtual ~PhraseDictionary() {

View File

@ -402,7 +402,7 @@ void TranslationOptionCollection::CreateTranslationOptions()
// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() endPos:" << endPos << endl);
if (graphInd > 0 && // only skip subsequent graphs
backoff != 0 && // use of backoff specified
(endPos-startPos+1 <= backoff || // size exceeds backoff limit or ...
(endPos-startPos+1 > backoff || // size exceeds backoff limit or ...
m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far
VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl);
// do not create more options