transliteration pt obery backoff argument. Probably not threadable yet

This commit is contained in:
Hieu Hoang 2013-12-05 13:06:35 +00:00
parent 19aa8c1056
commit b33cf30bc1
11 changed files with 108 additions and 61 deletions

View File

@ -32,5 +32,11 @@ DecodeGraph::~DecodeGraph()
RemoveAllInColl(m_steps);
}
//! Add another decode step to the graph
void DecodeGraph::Add(DecodeStep *decodeStep) {
m_steps.push_back(decodeStep);
decodeStep->SetContainer(this);
}
}

View File

@ -71,9 +71,7 @@ public:
virtual ~DecodeGraph();
//! Add another decode step to the graph
void Add(const DecodeStep *decodeStep) {
m_steps.push_back(decodeStep);
}
void Add(DecodeStep *decodeStep);
size_t GetSize() const {
return m_steps.size();

View File

@ -26,7 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
DecodeStep::DecodeStep(const DecodeFeature *decodeFeature,
DecodeStep::DecodeStep(DecodeFeature *decodeFeature,
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features)
: m_decodeFeature(decodeFeature)
@ -56,8 +56,9 @@ DecodeStep::DecodeStep(const DecodeFeature *decodeFeature,
} else {
m_featuresRemaining.push_back(feature);
}
}
decodeFeature->SetContainer(this);
}
DecodeStep::~DecodeStep() {}

View File

@ -38,6 +38,7 @@ class PartialTranslOptColl;
class FactorCollection;
class InputType;
class FeatureFunction;
class DecodeGraph;
/** Specification for a decoding step.
* The factored translation model consists of Translation and Generation
@ -52,11 +53,12 @@ protected:
std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
const DecodeFeature* m_decodeFeature;
const DecodeGraph *m_container;
std::vector<FeatureFunction*> m_featuresToApply, m_featuresRemaining;
public:
DecodeStep(); //! not implemented
DecodeStep(const DecodeFeature *featurePtr,
DecodeStep(DecodeFeature *featurePtr,
const DecodeStep* prevDecodeStep,
const std::vector<FeatureFunction*> &features);
virtual ~DecodeStep();
@ -101,9 +103,13 @@ public:
/*! returns generation table feature for generation step */
const GenerationDictionary* GetGenerationDictionaryFeature() const;
void RemoveFeature(const FeatureFunction *ff);
void SetContainer(const DecodeGraph *container)
{ m_container = container; }
const DecodeGraph *GetContainer() const
{ return m_container; }
};
}

View File

@ -30,7 +30,7 @@ namespace Moses
{
using namespace std;
DecodeStepGeneration::DecodeStepGeneration(const GenerationDictionary* dict,
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict,
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features)
: DecodeStep(dict, prev, features)

View File

@ -35,7 +35,7 @@ class ScoreComponentCollection;
class DecodeStepGeneration : public DecodeStep
{
public:
DecodeStepGeneration(const GenerationDictionary* dict,
DecodeStepGeneration(GenerationDictionary* dict,
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features);

View File

@ -30,7 +30,7 @@ using namespace std;
namespace Moses
{
DecodeStepTranslation::DecodeStepTranslation(const PhraseDictionary* pdf,
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features)
: DecodeStep(pdf, prev, features)

View File

@ -38,7 +38,7 @@ class DecodeStepTranslation : public DecodeStep
{
public:
DecodeStepTranslation(); //! not implemented
DecodeStepTranslation(const PhraseDictionary* phraseFeature,
DecodeStepTranslation(PhraseDictionary* phraseFeature,
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features);

View File

@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
class DecodeStep;
/**
* Baseclass for phrase-table or generation table feature function
@ -78,11 +79,15 @@ public:
, ScoreComponentCollection &estimatedFutureScore) const
{}
void SetContainer(const DecodeStep *container)
{ m_container = container; }
protected:
std::vector<FactorType> m_input;
std::vector<FactorType> m_output;
FactorMask m_inputFactors;
FactorMask m_outputFactors;
const DecodeStep *m_container;
};
}

View File

@ -2,6 +2,8 @@
#include <stdlib.h>
#include "TransliterationPhraseDictionary.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include "moses/DecodeGraph.h"
#include "moses/DecodeStep.h"
using namespace std;
@ -29,62 +31,56 @@ void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const Input
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() != 1) {
// only translit single words. This should be user configurable
if (!SatisfyBackoff(inputPath)) {
continue;
}
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
const TargetPhraseCollection *tpCollPrev = temp.first;
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (tpCollPrev && tpCollPrev->GetSize()) {
// already have translation from another pt. Don't transliterate
break;
}
// TRANSLITERATE
char *ptr = tmpnam(NULL);
string inFile(ptr);
ptr = tmpnam(NULL);
string outDir(ptr);
ofstream inStream(inFile.c_str());
inStream << sourcePhrase.ToString() << endl;
inStream.close();
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
" --transliteration-model-dir " + m_filePath +
" --moses-src-dir " + m_mosesDir +
" --external-bin-dir " + m_externalDir +
" --input-extension " + m_inputLang +
" --output-extension " + m_outputLang +
" --oov-file " + inFile +
" --out-dir " + outDir;
int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter;
tpColl->Add(tp);
}
m_allTPColl.push_back(tpColl);
inputPath.SetTargetPhrases(*this, tpColl, NULL);
remove(inFile.c_str());
cmd = "rm -rf " + outDir;
system(cmd.c_str());
if (sourcePhrase.GetSize() != 1) {
// only translit single words. A limitation of the translit script
continue;
}
// TRANSLITERATE
char *ptr = tmpnam(NULL);
string inFile(ptr);
ptr = tmpnam(NULL);
string outDir(ptr);
ofstream inStream(inFile.c_str());
inStream << sourcePhrase.ToString() << endl;
inStream.close();
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
" --transliteration-model-dir " + m_filePath +
" --moses-src-dir " + m_mosesDir +
" --external-bin-dir " + m_externalDir +
" --input-extension " + m_inputLang +
" --output-extension " + m_outputLang +
" --oov-file " + inFile +
" --out-dir " + outDir;
int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter;
tpColl->Add(tp);
}
m_allTPColl.push_back(tpColl);
inputPath.SetTargetPhrases(*this, tpColl, NULL);
// clean up temporary files
remove(inFile.c_str());
cmd = "rm -rf " + outDir;
system(cmd.c_str());
}
}
@ -145,6 +141,39 @@ SetParameter(const std::string& key, const std::string& value)
}
}
bool TransliterationPhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
{
const Phrase &sourcePhrase = inputPath.GetPhrase();
assert(m_container);
const DecodeGraph *decodeGraph = m_container->GetContainer();
size_t backoff = decodeGraph->GetBackoff();
if (backoff == 0) {
// ie. don't backoff. Collect ALL translations
return true;
}
if (sourcePhrase.GetSize() > backoff) {
// source phrase too big
return false;
}
// lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
const TargetPhraseCollection *tpCollPrev = temp.first;
if (tpCollPrev && tpCollPrev->GetSize()) {
// already have translation from another pt. Don't create translations
return false;
}
}
return true;
}
TO_STRING_BODY(TransliterationPhraseDictionary);
// friend

View File

@ -8,6 +8,7 @@ namespace Moses
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
class InputPath;
class TransliterationPhraseDictionary : public PhraseDictionary
{
@ -28,13 +29,14 @@ public:
TO_STRING();
protected:
mutable std::list<TargetPhraseCollection*> m_allTPColl;
std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
std::vector<TargetPhrase*> CreateTargetPhrases(const Phrase &sourcePhrase, const std::string &outDir) const;
bool SatisfyBackoff(const InputPath &inputPath) const;
};
} // namespace Moses