isNT must be set correctly when creating factor

This commit is contained in:
Hieu Hoang 2016-04-29 12:19:53 +01:00
parent 88b4b23434
commit 96e735dda1
7 changed files with 35 additions and 7 deletions

View File

@ -62,7 +62,7 @@ public:
void Add(lm::WordIndex index, const StringPiece &str)
{
std::size_t factorId = m_factorCollection.AddFactor(str, m_system)->GetId();
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);

View File

@ -14,6 +14,12 @@ namespace Moses2
{
namespace SCFG
{
Word::Word(const Word &copy)
:Moses2::Word(copy)
,isNonTerminal(copy.isNonTerminal)
{
}
void Word::CreateFromString(FactorCollection &vocab,
const System &system,
const std::string &str)
@ -46,7 +52,7 @@ void Word::CreateFromString(FactorCollection &vocab,
const string &tok = toks[i];
//cerr << "tok=" << tok << endl;
const Factor *factor = vocab.AddFactor(tok, system, false);
const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
m_factors[i] = factor;
}
}

View File

@ -19,6 +19,9 @@ class Word: public Moses2::Word
public:
bool isNonTerminal;
Word() {}
Word(const Word &copy);
void CreateFromString(FactorCollection &vocab,
const System &system,
const std::string &str);

View File

@ -99,6 +99,12 @@ public:
const Children &GetChildren() const
{ return m_children; }
void Debug() const {
BOOST_FOREACH(const typename Children::value_type &valPair, m_children) {
const WORD &word = valPair.first;
std::cerr << word << "(" << word.hash() << ") ";
}
}
protected:
Children m_children;
TPS *m_targetPhrases;
@ -119,6 +125,8 @@ protected:
else {
const WORD &word = source[pos];
Node &child = m_children[word];
std::cerr << "added " << word << " " << &child << " from " << this << std::endl;
return child.AddRule(source, target, pos + 1);
}
}

View File

@ -75,6 +75,7 @@ void PhraseTableMemory::Load(System &system)
TokenizeMultiCharSeparator(toks, line, "|||");
UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
//cerr << "line=" << line << endl;
//cerr << "system.isPb=" << system.isPb << endl;
if (system.isPb) {
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
@ -101,7 +102,7 @@ void PhraseTableMemory::Load(System &system)
else {
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
toks[0]);
//cerr << "created soure" << endl;
//cerr << "created source:" << *source << endl;
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
system, toks[1]);
target->SetAlignmentInfo(toks[3]);
@ -273,7 +274,17 @@ void PhraseTableMemory::LookupGivenNode(const SCFGNODE &node,
{
size_t ptInd = GetPtInd();
const SCFGNODE *nextNode = node.Find(wordSought);
cerr << " nextNode=" << nextNode << endl;
cerr << " finding " << wordSought
<< " from " << &node
<< " found " << nextNode << endl;
if (nextNode == NULL) {
cerr << " " << wordSought << "(" << wordSought.hash() << ")"
<< " node contains:";
node.Debug();
cerr << endl;
}
if (nextNode) {
// new entries

View File

@ -51,7 +51,7 @@ void ProbingPT::Load(System &system)
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
++iterSource) {
const string &wordStr = iterSource->second;
const Factor *factor = vocab.AddFactor(wordStr, system);
const Factor *factor = vocab.AddFactor(wordStr, system, false);
uint64_t probingId = iterSource->first;
size_t factorId = factor->GetId();
@ -68,7 +68,7 @@ void ProbingPT::Load(System &system)
while (getline(targetVocabStrme, line)) {
vector<string> toks = Tokenize(line, "\t");
assert(toks.size());
const Factor *factor = vocab.AddFactor(toks[0], system);
const Factor *factor = vocab.AddFactor(toks[0], system, false);
uint32_t probingId = Scan<uint32_t>(toks[1]);
if (probingId >= m_targetVocab.size()) {

View File

@ -114,7 +114,7 @@ public:
* If a factor already exist in the collection, return the existing factor, if not create a new 1
*/
const Factor *AddFactor(const StringPiece &factorString, const System &system,
bool isNonTerminal = false);
bool isNonTerminal);
size_t GetNumNonTerminals()
{