move ConvertFromMoses() to Moses

This commit is contained in:
Hieu Hoang 2016-06-28 11:15:40 +01:00
parent ad240a9f5b
commit ebae7ce520
6 changed files with 48 additions and 47 deletions

View File

@ -23,7 +23,7 @@
#include <sys/stat.h>
#include <string>
#include "OnDiskWrapper.h"
#include "moses/Factor.h"
#include "moses/Util.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
@ -219,42 +219,5 @@ uint64_t OnDiskWrapper::GetMisc(const std::string &key) const
return iter->second;
}
Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const
{
bool isNonTerminal = origWord.IsNonTerminal();
Word *newWord = new Word(isNonTerminal);
util::StringStream strme;
size_t factorType = factorsVec[0];
const Moses::Factor *factor = origWord.GetFactor(factorType);
UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
strme << factor->GetString();
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
size_t factorType = factorsVec[ind];
const Moses::Factor *factor = origWord.GetFactor(factorType);
if (factor == NULL) {
// can have less factors than factorType.size()
break;
}
UTIL_THROW_IF2(factor == NULL,
"Expecting factor " << factorType << " at position " << ind);
strme << "|" << factor->GetString();
} // for (size_t factorType
bool found;
uint64_t vocabId = m_vocab.GetVocabId(strme.str(), found);
if (!found) {
// factor not in phrase table -> phrse definately not in. exit
delete newWord;
return NULL;
} else {
newWord->SetVocabId(vocabId);
return newWord;
}
}
}

View File

@ -22,7 +22,6 @@
#include <fstream>
#include "Vocab.h"
#include "PhraseNode.h"
#include "moses/Word.h"
namespace OnDiskPt
{
@ -107,9 +106,6 @@ public:
uint64_t GetMisc(const std::string &key) const;
Word *ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const;
};
}

View File

@ -21,6 +21,7 @@
#include <fstream>
#include "OnDiskWrapper.h"
#include "Vocab.h"
#include "moses/Util.h"
#include "util/exception.hh"
using namespace std;

View File

@ -108,7 +108,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
// search for terminal symbol
if (startPos == absEndPos) {
OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceWordLabel.GetLabel());
OnDiskPt::Word *sourceWordBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceWordLabel.GetLabel());
if (sourceWordBerkeleyDb != NULL) {
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
@ -154,7 +154,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
const Word &sourceLHS = *iterSourceLHS;
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS);
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
if (sourceLHSBerkeleyDb == NULL) {
delete sourceLHSBerkeleyDb;
@ -190,7 +190,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
if (doSearch) {
OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_outputFactorsVec, cellLabel.GetLabel());
OnDiskPt::Word *chartNonTermBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_outputFactorsVec, cellLabel.GetLabel());
if (chartNonTermBerkeleyDb == NULL)
continue;
@ -234,7 +234,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
const Word &sourceLHS = *iterLabelSet;
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS);
OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
if (sourceLHSBerkeleyDb == NULL)
continue;

View File

@ -150,7 +150,7 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
if (prevPtNode) {
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
lastWord.OnlyTheseFactors(m_inputFactors);
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
OnDiskPt::Word *lastWordOnDisk = ConvertFromMoses(wrapper, m_input, lastWord);
TargetPhraseCollection::shared_ptr tpc;
if (lastWordOnDisk == NULL) {
@ -344,6 +344,44 @@ void PhraseDictionaryOnDisk::ConvertToMoses(
}
}
OnDiskPt::Word *PhraseDictionaryOnDisk::ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const
{
bool isNonTerminal = origWord.IsNonTerminal();
OnDiskPt::Word *newWord = new OnDiskPt::Word(isNonTerminal);
util::StringStream strme;
size_t factorType = factorsVec[0];
const Moses::Factor *factor = origWord.GetFactor(factorType);
UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
strme << factor->GetString();
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
size_t factorType = factorsVec[ind];
const Moses::Factor *factor = origWord.GetFactor(factorType);
if (factor == NULL) {
// can have less factors than factorType.size()
break;
}
UTIL_THROW_IF2(factor == NULL,
"Expecting factor " << factorType << " at position " << ind);
strme << "|" << factor->GetString();
} // for (size_t factorType
bool found;
uint64_t vocabId = wrapper.GetVocab().GetVocabId(strme.str(), found);
if (!found) {
// factor not in phrase table -> phrse definately not in. exit
delete newWord;
return NULL;
} else {
newWord->SetVocabId(vocabId);
return newWord;
}
}
void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value)
{
if (key == "max-span-default") {

View File

@ -107,6 +107,9 @@ public:
, OnDiskPt::Vocab &vocab
, bool isSyntax) const;
OnDiskPt::Word *ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const;
void SetParameter(const std::string& key, const std::string& value);
};