mosesdecoder/moses/TranslationModel/PhraseDictionaryTransliteration.cpp

175 lines
4.9 KiB
C++
Raw Normal View History

// vim:tabstop=2
#include <cstdlib>
#include "PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include "moses/DecodeGraph.h"
#include "moses/DecodeStep.h"
#include "util/tempfile.hh"
using namespace std;
namespace Moses
{
PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::string &line)
: PhraseDictionary(line, true)
{
ReadParameters();
UTIL_THROW_IF2(m_mosesDir.empty() ||
2015-01-14 14:07:42 +03:00
m_scriptDir.empty() ||
m_externalDir.empty() ||
m_inputLang.empty() ||
m_outputLang.empty(), "Must specify all arguments");
}
2015-12-10 06:17:36 +03:00
void PhraseDictionaryTransliteration::Load(AllOptions::ptr const& opts)
{
2015-12-10 06:17:36 +03:00
m_options = opts;
2015-01-14 14:07:42 +03:00
SetFeaturesToApply();
}
void PhraseDictionaryTransliteration::CleanUpAfterSentenceProcessing(const InputType& source)
{
2015-01-14 14:07:42 +03:00
ReduceCache();
}
void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
if (!SatisfyBackoff(inputPath)) {
2015-01-14 14:07:42 +03:00
continue;
}
const Phrase &sourcePhrase = inputPath.GetPhrase();
if (sourcePhrase.GetSize() != 1) {
2015-01-14 14:07:42 +03:00
// only translit single words. A limitation of the translit script
continue;
}
GetTargetPhraseCollection(inputPath);
}
}
2015-10-19 02:00:40 +03:00
void
PhraseDictionaryTransliteration::
GetTargetPhraseCollection(InputPath &inputPath) const
{
2015-01-14 14:07:42 +03:00
const Phrase &sourcePhrase = inputPath.GetPhrase();
size_t hash = hash_value(sourcePhrase);
2015-01-14 14:07:42 +03:00
CacheColl &cache = GetCache();
2015-01-14 14:07:42 +03:00
CacheColl::iterator iter;
iter = cache.find(hash);
2015-01-14 14:07:42 +03:00
if (iter != cache.end()) {
// already in cache
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
2015-01-14 14:07:42 +03:00
inputPath.SetTargetPhrases(*this, tpColl, NULL);
} else {
// TRANSLITERATE
const util::temp_file inFile;
const util::temp_dir outDir;
2015-01-14 14:07:42 +03:00
ofstream inStream(inFile.path().c_str());
2015-01-14 14:07:42 +03:00
inStream << sourcePhrase.ToString() << endl;
inStream.close();
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
" --transliteration-model-dir " + m_filePath +
" --moses-src-dir " + m_mosesDir +
" --external-bin-dir " + m_externalDir +
" --input-extension " + m_inputLang +
" --output-extension " + m_outputLang +
" --oov-file " + inFile.path() +
" --out-dir " + outDir.path();
2015-01-14 14:07:42 +03:00
int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
2015-10-19 02:00:40 +03:00
vector<TargetPhrase*> targetPhrases
= CreateTargetPhrases(sourcePhrase, outDir.path());
2015-01-14 14:07:42 +03:00
vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter;
tpColl->Add(tp);
}
cache[hash] = CacheCollEntry(tpColl, clock());
2015-01-14 14:07:42 +03:00
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
}
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
{
2015-01-14 14:07:42 +03:00
std::vector<TargetPhrase*> ret;
2015-01-14 14:07:42 +03:00
string outPath = outDir + "/out.txt";
ifstream outStream(outPath.c_str());
2015-01-14 14:07:42 +03:00
string line;
while (getline(outStream, line)) {
vector<string> toks;
Tokenize(toks, line, "\t");
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
2015-01-14 14:07:42 +03:00
TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, toks[0], false);
2015-01-14 14:07:42 +03:00
float score = Scan<float>(toks[1]);
tp->GetScoreBreakdown().PlusEquals(this, score);
2015-01-14 14:07:42 +03:00
// score of all other ff when this rule is being loaded
tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
2015-01-14 14:07:42 +03:00
ret.push_back(tp);
}
2015-01-14 14:07:42 +03:00
outStream.close();
return ret;
}
ChartRuleLookupManager* PhraseDictionaryTransliteration::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
2015-01-14 14:07:42 +03:00
return NULL;
//return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
}
void
PhraseDictionaryTransliteration::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "moses-dir") {
2015-01-14 14:07:42 +03:00
m_mosesDir = value;
} else if (key == "script-dir") {
2015-01-14 14:07:42 +03:00
m_scriptDir = value;
} else if (key == "external-dir") {
2015-01-14 14:07:42 +03:00
m_externalDir = value;
} else if (key == "input-lang") {
2015-01-14 14:07:42 +03:00
m_inputLang = value;
} else if (key == "output-lang") {
2015-01-14 14:07:42 +03:00
m_outputLang = value;
} else {
2015-01-14 14:07:42 +03:00
PhraseDictionary::SetParameter(key, value);
}
}
TO_STRING_BODY(PhraseDictionaryTransliteration);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryTransliteration& phraseDict)
{
return out;
}
}