mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
transliteration pt obery backoff argument. Probably not threadable yet
This commit is contained in:
parent
19aa8c1056
commit
b33cf30bc1
@ -32,5 +32,11 @@ DecodeGraph::~DecodeGraph()
|
||||
RemoveAllInColl(m_steps);
|
||||
}
|
||||
|
||||
//! Add another decode step to the graph
|
||||
void DecodeGraph::Add(DecodeStep *decodeStep) {
|
||||
m_steps.push_back(decodeStep);
|
||||
decodeStep->SetContainer(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -71,9 +71,7 @@ public:
|
||||
virtual ~DecodeGraph();
|
||||
|
||||
//! Add another decode step to the graph
|
||||
void Add(const DecodeStep *decodeStep) {
|
||||
m_steps.push_back(decodeStep);
|
||||
}
|
||||
void Add(DecodeStep *decodeStep);
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_steps.size();
|
||||
|
@ -26,7 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
DecodeStep::DecodeStep(const DecodeFeature *decodeFeature,
|
||||
DecodeStep::DecodeStep(DecodeFeature *decodeFeature,
|
||||
const DecodeStep* prev,
|
||||
const std::vector<FeatureFunction*> &features)
|
||||
: m_decodeFeature(decodeFeature)
|
||||
@ -56,8 +56,9 @@ DecodeStep::DecodeStep(const DecodeFeature *decodeFeature,
|
||||
} else {
|
||||
m_featuresRemaining.push_back(feature);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
decodeFeature->SetContainer(this);
|
||||
}
|
||||
|
||||
DecodeStep::~DecodeStep() {}
|
||||
|
@ -38,6 +38,7 @@ class PartialTranslOptColl;
|
||||
class FactorCollection;
|
||||
class InputType;
|
||||
class FeatureFunction;
|
||||
class DecodeGraph;
|
||||
|
||||
/** Specification for a decoding step.
|
||||
* The factored translation model consists of Translation and Generation
|
||||
@ -52,11 +53,12 @@ protected:
|
||||
std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
|
||||
std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
|
||||
const DecodeFeature* m_decodeFeature;
|
||||
const DecodeGraph *m_container;
|
||||
|
||||
std::vector<FeatureFunction*> m_featuresToApply, m_featuresRemaining;
|
||||
public:
|
||||
DecodeStep(); //! not implemented
|
||||
DecodeStep(const DecodeFeature *featurePtr,
|
||||
DecodeStep(DecodeFeature *featurePtr,
|
||||
const DecodeStep* prevDecodeStep,
|
||||
const std::vector<FeatureFunction*> &features);
|
||||
virtual ~DecodeStep();
|
||||
@ -101,9 +103,13 @@ public:
|
||||
/*! returns generation table feature for generation step */
|
||||
const GenerationDictionary* GetGenerationDictionaryFeature() const;
|
||||
|
||||
|
||||
void RemoveFeature(const FeatureFunction *ff);
|
||||
|
||||
void SetContainer(const DecodeGraph *container)
|
||||
{ m_container = container; }
|
||||
const DecodeGraph *GetContainer() const
|
||||
{ return m_container; }
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ namespace Moses
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
DecodeStepGeneration::DecodeStepGeneration(const GenerationDictionary* dict,
|
||||
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict,
|
||||
const DecodeStep* prev,
|
||||
const std::vector<FeatureFunction*> &features)
|
||||
: DecodeStep(dict, prev, features)
|
||||
|
@ -35,7 +35,7 @@ class ScoreComponentCollection;
|
||||
class DecodeStepGeneration : public DecodeStep
|
||||
{
|
||||
public:
|
||||
DecodeStepGeneration(const GenerationDictionary* dict,
|
||||
DecodeStepGeneration(GenerationDictionary* dict,
|
||||
const DecodeStep* prev,
|
||||
const std::vector<FeatureFunction*> &features);
|
||||
|
||||
|
@ -30,7 +30,7 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
DecodeStepTranslation::DecodeStepTranslation(const PhraseDictionary* pdf,
|
||||
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
|
||||
const DecodeStep* prev,
|
||||
const std::vector<FeatureFunction*> &features)
|
||||
: DecodeStep(pdf, prev, features)
|
||||
|
@ -38,7 +38,7 @@ class DecodeStepTranslation : public DecodeStep
|
||||
{
|
||||
public:
|
||||
DecodeStepTranslation(); //! not implemented
|
||||
DecodeStepTranslation(const PhraseDictionary* phraseFeature,
|
||||
DecodeStepTranslation(PhraseDictionary* phraseFeature,
|
||||
const DecodeStep* prev,
|
||||
const std::vector<FeatureFunction*> &features);
|
||||
|
||||
|
@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class DecodeStep;
|
||||
|
||||
/**
|
||||
* Baseclass for phrase-table or generation table feature function
|
||||
@ -78,11 +79,15 @@ public:
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
void SetContainer(const DecodeStep *container)
|
||||
{ m_container = container; }
|
||||
|
||||
protected:
|
||||
std::vector<FactorType> m_input;
|
||||
std::vector<FactorType> m_output;
|
||||
FactorMask m_inputFactors;
|
||||
FactorMask m_outputFactors;
|
||||
const DecodeStep *m_container;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <stdlib.h>
|
||||
#include "TransliterationPhraseDictionary.h"
|
||||
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
|
||||
#include "moses/DecodeGraph.h"
|
||||
#include "moses/DecodeStep.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -29,62 +31,56 @@ void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const Input
|
||||
InputPathList::const_iterator iter;
|
||||
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
||||
InputPath &inputPath = **iter;
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
if (sourcePhrase.GetSize() != 1) {
|
||||
// only translit single words. This should be user configurable
|
||||
if (!SatisfyBackoff(inputPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
InputPath::TargetPhrases::const_iterator iter;
|
||||
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||
// already have translation from another pt. Don't transliterate
|
||||
break;
|
||||
}
|
||||
|
||||
// TRANSLITERATE
|
||||
char *ptr = tmpnam(NULL);
|
||||
string inFile(ptr);
|
||||
ptr = tmpnam(NULL);
|
||||
string outDir(ptr);
|
||||
|
||||
ofstream inStream(inFile.c_str());
|
||||
inStream << sourcePhrase.ToString() << endl;
|
||||
inStream.close();
|
||||
|
||||
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
|
||||
" --transliteration-model-dir " + m_filePath +
|
||||
" --moses-src-dir " + m_mosesDir +
|
||||
" --external-bin-dir " + m_externalDir +
|
||||
" --input-extension " + m_inputLang +
|
||||
" --output-extension " + m_outputLang +
|
||||
" --oov-file " + inFile +
|
||||
" --out-dir " + outDir;
|
||||
|
||||
int ret = system(cmd.c_str());
|
||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
|
||||
vector<TargetPhrase*>::const_iterator iter;
|
||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||
TargetPhrase *tp = *iter;
|
||||
tpColl->Add(tp);
|
||||
}
|
||||
|
||||
m_allTPColl.push_back(tpColl);
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
|
||||
remove(inFile.c_str());
|
||||
|
||||
cmd = "rm -rf " + outDir;
|
||||
system(cmd.c_str());
|
||||
if (sourcePhrase.GetSize() != 1) {
|
||||
// only translit single words. A limitation of the translit script
|
||||
continue;
|
||||
}
|
||||
|
||||
// TRANSLITERATE
|
||||
char *ptr = tmpnam(NULL);
|
||||
string inFile(ptr);
|
||||
ptr = tmpnam(NULL);
|
||||
string outDir(ptr);
|
||||
|
||||
ofstream inStream(inFile.c_str());
|
||||
inStream << sourcePhrase.ToString() << endl;
|
||||
inStream.close();
|
||||
|
||||
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
|
||||
" --transliteration-model-dir " + m_filePath +
|
||||
" --moses-src-dir " + m_mosesDir +
|
||||
" --external-bin-dir " + m_externalDir +
|
||||
" --input-extension " + m_inputLang +
|
||||
" --output-extension " + m_outputLang +
|
||||
" --oov-file " + inFile +
|
||||
" --out-dir " + outDir;
|
||||
|
||||
int ret = system(cmd.c_str());
|
||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
|
||||
vector<TargetPhrase*>::const_iterator iter;
|
||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||
TargetPhrase *tp = *iter;
|
||||
tpColl->Add(tp);
|
||||
}
|
||||
|
||||
m_allTPColl.push_back(tpColl);
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
|
||||
// clean up temporary files
|
||||
remove(inFile.c_str());
|
||||
|
||||
cmd = "rm -rf " + outDir;
|
||||
system(cmd.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,6 +141,39 @@ SetParameter(const std::string& key, const std::string& value)
|
||||
}
|
||||
}
|
||||
|
||||
bool TransliterationPhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
|
||||
{
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
assert(m_container);
|
||||
const DecodeGraph *decodeGraph = m_container->GetContainer();
|
||||
size_t backoff = decodeGraph->GetBackoff();
|
||||
|
||||
if (backoff == 0) {
|
||||
// ie. don't backoff. Collect ALL translations
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sourcePhrase.GetSize() > backoff) {
|
||||
// source phrase too big
|
||||
return false;
|
||||
}
|
||||
|
||||
// lookup translation only if no other translations
|
||||
InputPath::TargetPhrases::const_iterator iter;
|
||||
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
||||
|
||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||
// already have translation from another pt. Don't create translations
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TO_STRING_BODY(TransliterationPhraseDictionary);
|
||||
|
||||
// friend
|
||||
|
@ -8,6 +8,7 @@ namespace Moses
|
||||
class ChartParser;
|
||||
class ChartCellCollectionBase;
|
||||
class ChartRuleLookupManager;
|
||||
class InputPath;
|
||||
|
||||
class TransliterationPhraseDictionary : public PhraseDictionary
|
||||
{
|
||||
@ -28,13 +29,14 @@ public:
|
||||
|
||||
TO_STRING();
|
||||
|
||||
|
||||
protected:
|
||||
mutable std::list<TargetPhraseCollection*> m_allTPColl;
|
||||
|
||||
std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
|
||||
|
||||
std::vector<TargetPhrase*> CreateTargetPhrases(const Phrase &sourcePhrase, const std::string &outDir) const;
|
||||
bool SatisfyBackoff(const InputPath &inputPath) const;
|
||||
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
Loading…
Reference in New Issue
Block a user