mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-02 17:09:36 +03:00
isNT must be set correctly when creating factor
This commit is contained in:
parent
88b4b23434
commit
96e735dda1
@ -62,7 +62,7 @@ public:
|
|||||||
|
|
||||||
void Add(lm::WordIndex index, const StringPiece &str)
|
void Add(lm::WordIndex index, const StringPiece &str)
|
||||||
{
|
{
|
||||||
std::size_t factorId = m_factorCollection.AddFactor(str, m_system)->GetId();
|
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
|
||||||
if (m_mapping.size() <= factorId) {
|
if (m_mapping.size() <= factorId) {
|
||||||
// 0 is <unk> :-)
|
// 0 is <unk> :-)
|
||||||
m_mapping.resize(factorId + 1);
|
m_mapping.resize(factorId + 1);
|
||||||
|
@ -14,6 +14,12 @@ namespace Moses2
|
|||||||
{
|
{
|
||||||
namespace SCFG
|
namespace SCFG
|
||||||
{
|
{
|
||||||
|
Word::Word(const Word ©)
|
||||||
|
:Moses2::Word(copy)
|
||||||
|
,isNonTerminal(copy.isNonTerminal)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void Word::CreateFromString(FactorCollection &vocab,
|
void Word::CreateFromString(FactorCollection &vocab,
|
||||||
const System &system,
|
const System &system,
|
||||||
const std::string &str)
|
const std::string &str)
|
||||||
@ -46,7 +52,7 @@ void Word::CreateFromString(FactorCollection &vocab,
|
|||||||
const string &tok = toks[i];
|
const string &tok = toks[i];
|
||||||
//cerr << "tok=" << tok << endl;
|
//cerr << "tok=" << tok << endl;
|
||||||
|
|
||||||
const Factor *factor = vocab.AddFactor(tok, system, false);
|
const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
|
||||||
m_factors[i] = factor;
|
m_factors[i] = factor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,9 @@ class Word: public Moses2::Word
|
|||||||
public:
|
public:
|
||||||
bool isNonTerminal;
|
bool isNonTerminal;
|
||||||
|
|
||||||
|
Word() {}
|
||||||
|
Word(const Word ©);
|
||||||
|
|
||||||
void CreateFromString(FactorCollection &vocab,
|
void CreateFromString(FactorCollection &vocab,
|
||||||
const System &system,
|
const System &system,
|
||||||
const std::string &str);
|
const std::string &str);
|
||||||
|
@ -99,6 +99,12 @@ public:
|
|||||||
const Children &GetChildren() const
|
const Children &GetChildren() const
|
||||||
{ return m_children; }
|
{ return m_children; }
|
||||||
|
|
||||||
|
void Debug() const {
|
||||||
|
BOOST_FOREACH(const typename Children::value_type &valPair, m_children) {
|
||||||
|
const WORD &word = valPair.first;
|
||||||
|
std::cerr << word << "(" << word.hash() << ") ";
|
||||||
|
}
|
||||||
|
}
|
||||||
protected:
|
protected:
|
||||||
Children m_children;
|
Children m_children;
|
||||||
TPS *m_targetPhrases;
|
TPS *m_targetPhrases;
|
||||||
@ -119,6 +125,8 @@ protected:
|
|||||||
else {
|
else {
|
||||||
const WORD &word = source[pos];
|
const WORD &word = source[pos];
|
||||||
Node &child = m_children[word];
|
Node &child = m_children[word];
|
||||||
|
std::cerr << "added " << word << " " << &child << " from " << this << std::endl;
|
||||||
|
|
||||||
return child.AddRule(source, target, pos + 1);
|
return child.AddRule(source, target, pos + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,6 +75,7 @@ void PhraseTableMemory::Load(System &system)
|
|||||||
TokenizeMultiCharSeparator(toks, line, "|||");
|
TokenizeMultiCharSeparator(toks, line, "|||");
|
||||||
UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
|
UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
|
||||||
//cerr << "line=" << line << endl;
|
//cerr << "line=" << line << endl;
|
||||||
|
//cerr << "system.isPb=" << system.isPb << endl;
|
||||||
|
|
||||||
if (system.isPb) {
|
if (system.isPb) {
|
||||||
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
|
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
|
||||||
@ -101,7 +102,7 @@ void PhraseTableMemory::Load(System &system)
|
|||||||
else {
|
else {
|
||||||
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
|
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
|
||||||
toks[0]);
|
toks[0]);
|
||||||
//cerr << "created soure" << endl;
|
//cerr << "created source:" << *source << endl;
|
||||||
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
|
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
|
||||||
system, toks[1]);
|
system, toks[1]);
|
||||||
target->SetAlignmentInfo(toks[3]);
|
target->SetAlignmentInfo(toks[3]);
|
||||||
@ -273,7 +274,17 @@ void PhraseTableMemory::LookupGivenNode(const SCFGNODE &node,
|
|||||||
{
|
{
|
||||||
size_t ptInd = GetPtInd();
|
size_t ptInd = GetPtInd();
|
||||||
const SCFGNODE *nextNode = node.Find(wordSought);
|
const SCFGNODE *nextNode = node.Find(wordSought);
|
||||||
cerr << " nextNode=" << nextNode << endl;
|
|
||||||
|
cerr << " finding " << wordSought
|
||||||
|
<< " from " << &node
|
||||||
|
<< " found " << nextNode << endl;
|
||||||
|
|
||||||
|
if (nextNode == NULL) {
|
||||||
|
cerr << " " << wordSought << "(" << wordSought.hash() << ")"
|
||||||
|
<< " node contains:";
|
||||||
|
node.Debug();
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
|
||||||
if (nextNode) {
|
if (nextNode) {
|
||||||
// new entries
|
// new entries
|
||||||
|
@ -51,7 +51,7 @@ void ProbingPT::Load(System &system)
|
|||||||
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
|
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end();
|
||||||
++iterSource) {
|
++iterSource) {
|
||||||
const string &wordStr = iterSource->second;
|
const string &wordStr = iterSource->second;
|
||||||
const Factor *factor = vocab.AddFactor(wordStr, system);
|
const Factor *factor = vocab.AddFactor(wordStr, system, false);
|
||||||
|
|
||||||
uint64_t probingId = iterSource->first;
|
uint64_t probingId = iterSource->first;
|
||||||
size_t factorId = factor->GetId();
|
size_t factorId = factor->GetId();
|
||||||
@ -68,7 +68,7 @@ void ProbingPT::Load(System &system)
|
|||||||
while (getline(targetVocabStrme, line)) {
|
while (getline(targetVocabStrme, line)) {
|
||||||
vector<string> toks = Tokenize(line, "\t");
|
vector<string> toks = Tokenize(line, "\t");
|
||||||
assert(toks.size());
|
assert(toks.size());
|
||||||
const Factor *factor = vocab.AddFactor(toks[0], system);
|
const Factor *factor = vocab.AddFactor(toks[0], system, false);
|
||||||
uint32_t probingId = Scan<uint32_t>(toks[1]);
|
uint32_t probingId = Scan<uint32_t>(toks[1]);
|
||||||
|
|
||||||
if (probingId >= m_targetVocab.size()) {
|
if (probingId >= m_targetVocab.size()) {
|
||||||
|
@ -114,7 +114,7 @@ public:
|
|||||||
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
||||||
*/
|
*/
|
||||||
const Factor *AddFactor(const StringPiece &factorString, const System &system,
|
const Factor *AddFactor(const StringPiece &factorString, const System &system,
|
||||||
bool isNonTerminal = false);
|
bool isNonTerminal);
|
||||||
|
|
||||||
size_t GetNumNonTerminals()
|
size_t GetNumNonTerminals()
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user