isNT must be set correctly when creating factor

This commit is contained in:
Hieu Hoang 2016-04-29 12:19:53 +01:00
parent 88b4b23434
commit 96e735dda1
7 changed files with 35 additions and 7 deletions

View File

@ -62,7 +62,7 @@ public:
void Add(lm::WordIndex index, const StringPiece &str) void Add(lm::WordIndex index, const StringPiece &str)
{ {
std::size_t factorId = m_factorCollection.AddFactor(str, m_system)->GetId(); std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) { if (m_mapping.size() <= factorId) {
// 0 is <unk> :-) // 0 is <unk> :-)
m_mapping.resize(factorId + 1); m_mapping.resize(factorId + 1);

View File

@ -14,6 +14,12 @@ namespace Moses2
{ {
namespace SCFG namespace SCFG
{ {
Word::Word(const Word &copy)
:Moses2::Word(copy)
,isNonTerminal(copy.isNonTerminal)
{
}
void Word::CreateFromString(FactorCollection &vocab, void Word::CreateFromString(FactorCollection &vocab,
const System &system, const System &system,
const std::string &str) const std::string &str)
@ -46,7 +52,7 @@ void Word::CreateFromString(FactorCollection &vocab,
const string &tok = toks[i]; const string &tok = toks[i];
//cerr << "tok=" << tok << endl; //cerr << "tok=" << tok << endl;
const Factor *factor = vocab.AddFactor(tok, system, false); const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
m_factors[i] = factor; m_factors[i] = factor;
} }
} }

View File

@ -19,6 +19,9 @@ class Word: public Moses2::Word
public: public:
bool isNonTerminal; bool isNonTerminal;
Word() {}
Word(const Word &copy);
void CreateFromString(FactorCollection &vocab, void CreateFromString(FactorCollection &vocab,
const System &system, const System &system,
const std::string &str); const std::string &str);

View File

@ -99,6 +99,12 @@ public:
const Children &GetChildren() const const Children &GetChildren() const
{ return m_children; } { return m_children; }
void Debug() const {
BOOST_FOREACH(const typename Children::value_type &valPair, m_children) {
const WORD &word = valPair.first;
std::cerr << word << "(" << word.hash() << ") ";
}
}
protected: protected:
Children m_children; Children m_children;
TPS *m_targetPhrases; TPS *m_targetPhrases;
@ -119,6 +125,8 @@ protected:
else { else {
const WORD &word = source[pos]; const WORD &word = source[pos];
Node &child = m_children[word]; Node &child = m_children[word];
std::cerr << "added " << word << " " << &child << " from " << this << std::endl;
return child.AddRule(source, target, pos + 1); return child.AddRule(source, target, pos + 1);
} }
} }

View File

@ -75,6 +75,7 @@ void PhraseTableMemory::Load(System &system)
TokenizeMultiCharSeparator(toks, line, "|||"); TokenizeMultiCharSeparator(toks, line, "|||");
UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
//cerr << "line=" << line << endl; //cerr << "line=" << line << endl;
//cerr << "system.isPb=" << system.isPb << endl;
if (system.isPb) { if (system.isPb) {
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
@ -101,7 +102,7 @@ void PhraseTableMemory::Load(System &system)
else { else {
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
toks[0]); toks[0]);
//cerr << "created soure" << endl; //cerr << "created source:" << *source << endl;
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
system, toks[1]); system, toks[1]);
target->SetAlignmentInfo(toks[3]); target->SetAlignmentInfo(toks[3]);
@ -273,7 +274,17 @@ void PhraseTableMemory::LookupGivenNode(const SCFGNODE &node,
{ {
size_t ptInd = GetPtInd(); size_t ptInd = GetPtInd();
const SCFGNODE *nextNode = node.Find(wordSought); const SCFGNODE *nextNode = node.Find(wordSought);
cerr << " nextNode=" << nextNode << endl;
cerr << " finding " << wordSought
<< " from " << &node
<< " found " << nextNode << endl;
if (nextNode == NULL) {
cerr << " " << wordSought << "(" << wordSought.hash() << ")"
<< " node contains:";
node.Debug();
cerr << endl;
}
if (nextNode) { if (nextNode) {
// new entries // new entries

View File

@ -51,7 +51,7 @@ void ProbingPT::Load(System &system)
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
++iterSource) { ++iterSource) {
const string &wordStr = iterSource->second; const string &wordStr = iterSource->second;
const Factor *factor = vocab.AddFactor(wordStr, system); const Factor *factor = vocab.AddFactor(wordStr, system, false);
uint64_t probingId = iterSource->first; uint64_t probingId = iterSource->first;
size_t factorId = factor->GetId(); size_t factorId = factor->GetId();
@ -68,7 +68,7 @@ void ProbingPT::Load(System &system)
while (getline(targetVocabStrme, line)) { while (getline(targetVocabStrme, line)) {
vector<string> toks = Tokenize(line, "\t"); vector<string> toks = Tokenize(line, "\t");
assert(toks.size()); assert(toks.size());
const Factor *factor = vocab.AddFactor(toks[0], system); const Factor *factor = vocab.AddFactor(toks[0], system, false);
uint32_t probingId = Scan<uint32_t>(toks[1]); uint32_t probingId = Scan<uint32_t>(toks[1]);
if (probingId >= m_targetVocab.size()) { if (probingId >= m_targetVocab.size()) {

View File

@ -114,7 +114,7 @@ public:
* If a factor already exist in the collection, return the existing factor, if not create a new 1 * If a factor already exist in the collection, return the existing factor, if not create a new 1
*/ */
const Factor *AddFactor(const StringPiece &factorString, const System &system, const Factor *AddFactor(const StringPiece &factorString, const System &system,
bool isNonTerminal = false); bool isNonTerminal);
size_t GetNumNonTerminals() size_t GetNumNonTerminals()
{ {