mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 14:50:41 +03:00
move ConvertFromMoses() to Moses
This commit is contained in:
parent
ad240a9f5b
commit
ebae7ce520
@ -23,7 +23,7 @@
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "OnDiskWrapper.h"
|
#include "OnDiskWrapper.h"
|
||||||
#include "moses/Factor.h"
|
#include "moses/Util.h"
|
||||||
#include "util/exception.hh"
|
#include "util/exception.hh"
|
||||||
#include "util/string_stream.hh"
|
#include "util/string_stream.hh"
|
||||||
|
|
||||||
@ -219,42 +219,5 @@ uint64_t OnDiskWrapper::GetMisc(const std::string &key) const
|
|||||||
return iter->second;
|
return iter->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
|
|
||||||
, const Moses::Word &origWord) const
|
|
||||||
{
|
|
||||||
bool isNonTerminal = origWord.IsNonTerminal();
|
|
||||||
Word *newWord = new Word(isNonTerminal);
|
|
||||||
|
|
||||||
util::StringStream strme;
|
|
||||||
|
|
||||||
size_t factorType = factorsVec[0];
|
|
||||||
const Moses::Factor *factor = origWord.GetFactor(factorType);
|
|
||||||
UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
|
|
||||||
strme << factor->GetString();
|
|
||||||
|
|
||||||
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
|
|
||||||
size_t factorType = factorsVec[ind];
|
|
||||||
const Moses::Factor *factor = origWord.GetFactor(factorType);
|
|
||||||
if (factor == NULL) {
|
|
||||||
// can have less factors than factorType.size()
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
UTIL_THROW_IF2(factor == NULL,
|
|
||||||
"Expecting factor " << factorType << " at position " << ind);
|
|
||||||
strme << "|" << factor->GetString();
|
|
||||||
} // for (size_t factorType
|
|
||||||
|
|
||||||
bool found;
|
|
||||||
uint64_t vocabId = m_vocab.GetVocabId(strme.str(), found);
|
|
||||||
if (!found) {
|
|
||||||
// factor not in phrase table -> phrse definately not in. exit
|
|
||||||
delete newWord;
|
|
||||||
return NULL;
|
|
||||||
} else {
|
|
||||||
newWord->SetVocabId(vocabId);
|
|
||||||
return newWord;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include "Vocab.h"
|
#include "Vocab.h"
|
||||||
#include "PhraseNode.h"
|
#include "PhraseNode.h"
|
||||||
#include "moses/Word.h"
|
|
||||||
|
|
||||||
namespace OnDiskPt
|
namespace OnDiskPt
|
||||||
{
|
{
|
||||||
@ -107,9 +106,6 @@ public:
|
|||||||
|
|
||||||
uint64_t GetMisc(const std::string &key) const;
|
uint64_t GetMisc(const std::string &key) const;
|
||||||
|
|
||||||
Word *ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
|
|
||||||
, const Moses::Word &origWord) const;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include "OnDiskWrapper.h"
|
#include "OnDiskWrapper.h"
|
||||||
#include "Vocab.h"
|
#include "Vocab.h"
|
||||||
|
#include "moses/Util.h"
|
||||||
#include "util/exception.hh"
|
#include "util/exception.hh"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -108,7 +108,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
|
|
||||||
// search for terminal symbol
|
// search for terminal symbol
|
||||||
if (startPos == absEndPos) {
|
if (startPos == absEndPos) {
|
||||||
OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceWordLabel.GetLabel());
|
OnDiskPt::Word *sourceWordBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceWordLabel.GetLabel());
|
||||||
|
|
||||||
if (sourceWordBerkeleyDb != NULL) {
|
if (sourceWordBerkeleyDb != NULL) {
|
||||||
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
|
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
|
||||||
@ -154,7 +154,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
|
for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
|
||||||
const Word &sourceLHS = *iterSourceLHS;
|
const Word &sourceLHS = *iterSourceLHS;
|
||||||
|
|
||||||
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS);
|
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
|
||||||
|
|
||||||
if (sourceLHSBerkeleyDb == NULL) {
|
if (sourceLHSBerkeleyDb == NULL) {
|
||||||
delete sourceLHSBerkeleyDb;
|
delete sourceLHSBerkeleyDb;
|
||||||
@ -190,7 +190,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
|
|
||||||
if (doSearch) {
|
if (doSearch) {
|
||||||
|
|
||||||
OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_outputFactorsVec, cellLabel.GetLabel());
|
OnDiskPt::Word *chartNonTermBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_outputFactorsVec, cellLabel.GetLabel());
|
||||||
|
|
||||||
if (chartNonTermBerkeleyDb == NULL)
|
if (chartNonTermBerkeleyDb == NULL)
|
||||||
continue;
|
continue;
|
||||||
@ -234,7 +234,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
|
for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
|
||||||
const Word &sourceLHS = *iterLabelSet;
|
const Word &sourceLHS = *iterLabelSet;
|
||||||
|
|
||||||
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS);
|
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
|
||||||
if (sourceLHSBerkeleyDb == NULL)
|
if (sourceLHSBerkeleyDb == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
|
|||||||
if (prevPtNode) {
|
if (prevPtNode) {
|
||||||
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
|
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
|
||||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||||
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
OnDiskPt::Word *lastWordOnDisk = ConvertFromMoses(wrapper, m_input, lastWord);
|
||||||
|
|
||||||
TargetPhraseCollection::shared_ptr tpc;
|
TargetPhraseCollection::shared_ptr tpc;
|
||||||
if (lastWordOnDisk == NULL) {
|
if (lastWordOnDisk == NULL) {
|
||||||
@ -344,6 +344,44 @@ void PhraseDictionaryOnDisk::ConvertToMoses(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OnDiskPt::Word *PhraseDictionaryOnDisk::ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector<Moses::FactorType> &factorsVec
|
||||||
|
, const Moses::Word &origWord) const
|
||||||
|
{
|
||||||
|
bool isNonTerminal = origWord.IsNonTerminal();
|
||||||
|
OnDiskPt::Word *newWord = new OnDiskPt::Word(isNonTerminal);
|
||||||
|
|
||||||
|
util::StringStream strme;
|
||||||
|
|
||||||
|
size_t factorType = factorsVec[0];
|
||||||
|
const Moses::Factor *factor = origWord.GetFactor(factorType);
|
||||||
|
UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
|
||||||
|
strme << factor->GetString();
|
||||||
|
|
||||||
|
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
|
||||||
|
size_t factorType = factorsVec[ind];
|
||||||
|
const Moses::Factor *factor = origWord.GetFactor(factorType);
|
||||||
|
if (factor == NULL) {
|
||||||
|
// can have less factors than factorType.size()
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
UTIL_THROW_IF2(factor == NULL,
|
||||||
|
"Expecting factor " << factorType << " at position " << ind);
|
||||||
|
strme << "|" << factor->GetString();
|
||||||
|
} // for (size_t factorType
|
||||||
|
|
||||||
|
bool found;
|
||||||
|
uint64_t vocabId = wrapper.GetVocab().GetVocabId(strme.str(), found);
|
||||||
|
if (!found) {
|
||||||
|
// factor not in phrase table -> phrse definately not in. exit
|
||||||
|
delete newWord;
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
newWord->SetVocabId(vocabId);
|
||||||
|
return newWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value)
|
void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value)
|
||||||
{
|
{
|
||||||
if (key == "max-span-default") {
|
if (key == "max-span-default") {
|
||||||
|
@ -107,6 +107,9 @@ public:
|
|||||||
, OnDiskPt::Vocab &vocab
|
, OnDiskPt::Vocab &vocab
|
||||||
, bool isSyntax) const;
|
, bool isSyntax) const;
|
||||||
|
|
||||||
|
OnDiskPt::Word *ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector<Moses::FactorType> &factorsVec
|
||||||
|
, const Moses::Word &origWord) const;
|
||||||
|
|
||||||
void SetParameter(const std::string& key, const std::string& value);
|
void SetParameter(const std::string& key, const std::string& value);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user