mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
moses_chart: add an alternative parser that implements the algorithm outlined
in Hopkins and Langmead (2010). This allows parsing of scope-3 grammars with cubic space and time requirements. Enabled using the new "parsing-algorithm" parameter (supports in-memory rule tables only).
This commit is contained in:
parent
9feecb80de
commit
b2688f3045
@ -51,6 +51,8 @@ class AlignmentInfo
|
||||
return m_nonTermIndexMap;
|
||||
}
|
||||
|
||||
size_t GetSize() const { return m_collection.size(); }
|
||||
|
||||
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
|
||||
|
||||
private:
|
||||
|
@ -14,4 +14,4 @@ if [ option.get "with-synlm" : no : yes ] = yes
|
||||
lib moses :
|
||||
#All cpp files except those listed
|
||||
[ glob *.cpp DynSAInclude/*.cpp : ThreadPool.cpp SyntacticLanguageModel.cpp ]
|
||||
synlm ThreadPool CYKPlusParser//CYKPlusParser LM//LM RuleTable//RuleTable headers ../..//z ../../OnDiskPt//OnDiskPt ;
|
||||
synlm ThreadPool CYKPlusParser//CYKPlusParser LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser headers ../..//z ../../OnDiskPt//OnDiskPt ;
|
||||
|
@ -126,6 +126,7 @@ Parameter::Parameter()
|
||||
AddParam("cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
|
||||
AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
|
||||
AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
|
||||
AddParam("parsing-algorithm", "Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0)");
|
||||
AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
|
||||
AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
|
||||
AddParam("use-alignment-info", "Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false.");
|
||||
|
@ -1,4 +1,3 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
@ -28,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#ifndef WIN32
|
||||
#include "PhraseDictionaryDynSuffixArray.h"
|
||||
#endif
|
||||
#include "RuleTable/UTrie.h"
|
||||
|
||||
#include "StaticData.h"
|
||||
#include "InputType.h"
|
||||
@ -122,16 +122,21 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
|
||||
VERBOSE(2,"Using gzipped file" << std::endl);
|
||||
}
|
||||
|
||||
PhraseDictionarySCFG* pdm = new PhraseDictionarySCFG(m_numScoreComponent,this);
|
||||
bool ret = pdm->Load(GetInput()
|
||||
RuleTableTrie *dict;
|
||||
if (staticData.GetParsingAlgorithm() == ParseScope3) {
|
||||
dict = new RuleTableUTrie(m_numScoreComponent, this);
|
||||
} else {
|
||||
dict = new PhraseDictionarySCFG(m_numScoreComponent, this);
|
||||
}
|
||||
bool ret = dict->Load(GetInput()
|
||||
, GetOutput()
|
||||
, m_filePath
|
||||
, m_weight
|
||||
, m_tableLimit
|
||||
, system->GetLanguageModels()
|
||||
, system->GetWordPenaltyProducer());
|
||||
CHECK(ret);
|
||||
return pdm;
|
||||
assert(ret);
|
||||
return dict;
|
||||
} else if (m_implementation == ALSuffixArray) {
|
||||
// memory phrase table
|
||||
VERBOSE(2,"using Hiero format phrase tables" << std::endl);
|
||||
|
95
moses/src/RuleTable/UTrie.cpp
Normal file
95
moses/src/RuleTable/UTrie.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "UTrie.h"
|
||||
|
||||
#include "NonTerminal.h"
|
||||
#include "RuleTable/Trie.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
#include "Scope3Parser/Parser.h"
|
||||
#include "StaticData.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhraseCollection.h"
|
||||
#include "Util.h"
|
||||
#include "Word.h"
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/version.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word &sourceLHS)
|
||||
{
|
||||
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||
return currNode.GetOrCreateTargetPhraseCollection(target);
|
||||
}
|
||||
|
||||
UTrieNode &RuleTableUTrie::GetOrCreateNode(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word &/*sourceLHS*/)
|
||||
{
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
|
||||
|
||||
UTrieNode *currNode = &m_root;
|
||||
for (size_t pos = 0 ; pos < size ; ++pos) {
|
||||
const Word &word = source.GetWord(pos);
|
||||
|
||||
if (word.IsNonTerminal()) {
|
||||
assert(iterAlign != target.GetAlignmentInfo().end());
|
||||
assert(iterAlign->first == pos);
|
||||
size_t targetNonTermInd = iterAlign->second;
|
||||
++iterAlign;
|
||||
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
|
||||
currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
|
||||
} else {
|
||||
currNode = currNode->GetOrCreateTerminalChild(word);
|
||||
}
|
||||
|
||||
assert(currNode != NULL);
|
||||
}
|
||||
|
||||
return *currNode;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager *RuleTableUTrie::CreateRuleLookupManager(
|
||||
const InputType &sentence,
|
||||
const ChartCellCollection &cellCollection)
|
||||
{
|
||||
// FIXME This should be a parameter to CreateRuleLookupManager
|
||||
size_t maxChartSpan = 0;
|
||||
return new Scope3Parser(sentence, cellCollection, *this, maxChartSpan);
|
||||
}
|
||||
|
||||
void RuleTableUTrie::SortAndPrune()
|
||||
{
|
||||
if (GetTableLimit()) {
|
||||
m_root.Sort(GetTableLimit());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Moses
|
67
moses/src/RuleTable/UTrie.h
Normal file
67
moses/src/RuleTable/UTrie.h
Normal file
@ -0,0 +1,67 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "RuleTable/Trie.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class Phrase;
|
||||
class TargetPhrase;
|
||||
class TargetPhraseCollection;
|
||||
class Word;
|
||||
|
||||
/*** Implementation of RuleTableTrie. A RuleTableUTrie is designed to store
|
||||
* string-to-tree SCFG grammars only (i.e. rules can have distinct labels on
|
||||
* the target side, but only a generic non-terminal on the source side).
|
||||
* A key is the source RHS (one symbol per edge) of a rule and a mapped value
|
||||
* is the collection of grammar rules that share the same source RHS.
|
||||
*
|
||||
* (The 'U' in UTrie stands for 'unlabelled' -- the keys are unlabelled and
|
||||
* the target labels are stored on the node values, as opposed to the grammar
|
||||
* being a monolingual projection with target labels projected onto the source
|
||||
* side.)
|
||||
*/
|
||||
class RuleTableUTrie : public RuleTableTrie
|
||||
{
|
||||
public:
|
||||
RuleTableUTrie(size_t numScoreComponents, PhraseDictionaryFeature *feature)
|
||||
: RuleTableTrie(numScoreComponents, feature) {}
|
||||
|
||||
const UTrieNode &GetRootNode() const { return m_root; }
|
||||
|
||||
ChartRuleLookupManager *CreateRuleLookupManager(const InputType &,
|
||||
const ChartCellCollection &);
|
||||
|
||||
private:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word &sourceLHS);
|
||||
|
||||
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||
const Word &sourceLHS);
|
||||
|
||||
void SortAndPrune();
|
||||
|
||||
UTrieNode m_root;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
115
moses/src/RuleTable/UTrieNode.cpp
Normal file
115
moses/src/RuleTable/UTrieNode.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "UTrieNode.h"
|
||||
|
||||
#include "NonTerminal.h"
|
||||
#include "PhraseDictionaryNodeSCFG.h" // For TerminalHasher and TerminalEqualityPred
|
||||
#include "RuleTable/Trie.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhraseCollection.h"
|
||||
#include "Util.h"
|
||||
#include "Word.h"
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/version.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
void UTrieNode::Prune(size_t tableLimit)
|
||||
{
|
||||
// Recusively prune child node values.
|
||||
for (TerminalMap::iterator p = m_terminalMap.begin();
|
||||
p != m_terminalMap.end(); ++p) {
|
||||
p->second.Prune(tableLimit);
|
||||
}
|
||||
if (m_gapNode) {
|
||||
m_gapNode->Prune(tableLimit);
|
||||
}
|
||||
|
||||
// Prune TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Prune(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
void UTrieNode::Sort(size_t tableLimit)
|
||||
{
|
||||
// Recusively sort child node values.
|
||||
for (TerminalMap::iterator p = m_terminalMap.begin();
|
||||
p != m_terminalMap.end(); ++p) {
|
||||
p->second.Sort(tableLimit);
|
||||
}
|
||||
if (m_gapNode) {
|
||||
m_gapNode->Sort(tableLimit);
|
||||
}
|
||||
|
||||
// Sort TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Sort(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
UTrieNode *UTrieNode::GetOrCreateTerminalChild(const Word &sourceTerm)
|
||||
{
|
||||
assert(!sourceTerm.IsNonTerminal());
|
||||
std::pair<TerminalMap::iterator, bool> result;
|
||||
result = m_terminalMap.insert(std::make_pair(sourceTerm, UTrieNode()));
|
||||
const TerminalMap::iterator &iter = result.first;
|
||||
UTrieNode &child = iter->second;
|
||||
return &child;
|
||||
}
|
||||
|
||||
UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||
{
|
||||
assert(targetNonTerm.IsNonTerminal());
|
||||
if (m_gapNode == NULL) {
|
||||
m_gapNode = new UTrieNode();
|
||||
}
|
||||
return m_gapNode;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &target)
|
||||
{
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
const size_t rank = alignmentInfo.GetSize();
|
||||
|
||||
std::vector<int> vec;
|
||||
vec.reserve(rank);
|
||||
|
||||
m_labelTable.resize(rank);
|
||||
|
||||
int i = 0;
|
||||
for (AlignmentInfo::const_iterator p = alignmentInfo.begin();
|
||||
p != alignmentInfo.end(); ++p) {
|
||||
size_t targetNonTermIndex = p->second;
|
||||
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
||||
vec.push_back(InsertLabel(i++, targetNonTerm));
|
||||
}
|
||||
|
||||
return m_labelMap[vec];
|
||||
}
|
||||
|
||||
} // namespace Moses
|
103
moses/src/RuleTable/UTrieNode.h
Normal file
103
moses/src/RuleTable/UTrieNode.h
Normal file
@ -0,0 +1,103 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NonTerminal.h"
|
||||
#include "RuleTable/Trie.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhraseCollection.h"
|
||||
#include "Terminal.h"
|
||||
#include "Util.h"
|
||||
#include "Word.h"
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/version.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class RuleTableUTrie;
|
||||
|
||||
class UTrieNode
|
||||
{
|
||||
public:
|
||||
typedef std::vector<std::vector<Word> > LabelTable;
|
||||
#if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
|
||||
typedef boost::unordered_map<Word,
|
||||
UTrieNode,
|
||||
TerminalHasher,
|
||||
TerminalEqualityPred> TerminalMap;
|
||||
|
||||
typedef boost::unordered_map<std::vector<int>,
|
||||
TargetPhraseCollection> LabelMap;
|
||||
#else
|
||||
typedef std::map<Word, UTrieNode> TerminalMap;
|
||||
typedef std::map<std::vector<int>, TargetPhraseCollection> LabelMap;
|
||||
#endif
|
||||
|
||||
~UTrieNode() { delete m_gapNode; }
|
||||
|
||||
const LabelTable &GetLabelTable() const { return m_labelTable; }
|
||||
const LabelMap &GetLabelMap() const { return m_labelMap; }
|
||||
const TerminalMap &GetTerminalMap() const { return m_terminalMap; }
|
||||
|
||||
const UTrieNode *GetNonTerminalChild() const { return m_gapNode; }
|
||||
|
||||
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
|
||||
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &);
|
||||
|
||||
bool IsLeaf() const { return m_terminalMap.empty() && m_gapNode == NULL; }
|
||||
|
||||
bool HasRules() const { return !m_labelMap.empty(); }
|
||||
|
||||
void Prune(size_t tableLimit);
|
||||
void Sort(size_t tableLimit);
|
||||
|
||||
private:
|
||||
friend class RuleTableUTrie;
|
||||
|
||||
UTrieNode() : m_gapNode(NULL) {}
|
||||
|
||||
int InsertLabel(int i, const Word &w)
|
||||
{
|
||||
std::vector<Word> &inner = m_labelTable[i];
|
||||
for (size_t j = 0; j < inner.size(); ++j) {
|
||||
if (inner[j] == w) {
|
||||
return j;
|
||||
}
|
||||
}
|
||||
inner.push_back(w);
|
||||
return inner.size()-1;
|
||||
}
|
||||
|
||||
LabelTable m_labelTable;
|
||||
LabelMap m_labelMap;
|
||||
TerminalMap m_terminalMap;
|
||||
UTrieNode *m_gapNode;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
60
moses/src/Scope3Parser/ApplicableRuleTrie.cpp
Normal file
60
moses/src/Scope3Parser/ApplicableRuleTrie.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "Scope3Parser/ApplicableRuleTrie.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
void ApplicableRuleTrie::Extend(const UTrieNode &root, int minPos,
|
||||
const SentenceMap &sentMap, bool followsGap)
|
||||
{
|
||||
const UTrieNode::TerminalMap &termMap = root.GetTerminalMap();
|
||||
for (UTrieNode::TerminalMap::const_iterator p = termMap.begin();
|
||||
p != termMap.end(); ++p) {
|
||||
const Word &word = p->first;
|
||||
const UTrieNode &child = p->second;
|
||||
SentenceMap::const_iterator q = sentMap.find(word);
|
||||
if (q == sentMap.end()) {
|
||||
continue;
|
||||
}
|
||||
for (std::vector<size_t>::const_iterator r = q->second.begin();
|
||||
r != q->second.end(); ++r) {
|
||||
size_t index = *r;
|
||||
if (index == minPos || (followsGap && index > minPos) || minPos == -1) {
|
||||
ApplicableRuleTrie *subTrie = new ApplicableRuleTrie(index, index,
|
||||
child);
|
||||
subTrie->Extend(child, index+1, sentMap, false);
|
||||
m_children.push_back(subTrie);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const UTrieNode *child = root.GetNonTerminalChild();
|
||||
if (!child) {
|
||||
return;
|
||||
}
|
||||
int start = followsGap ? -1 : minPos;
|
||||
ApplicableRuleTrie *subTrie = new ApplicableRuleTrie(start, -1, *child);
|
||||
int newMinPos = (minPos == -1 ? 1 : minPos+1);
|
||||
subTrie->Extend(*child, newMinPos, sentMap, true);
|
||||
m_children.push_back(subTrie);
|
||||
}
|
||||
|
||||
} // namespace Moses
|
57
moses/src/Scope3Parser/ApplicableRuleTrie.h
Normal file
57
moses/src/Scope3Parser/ApplicableRuleTrie.h
Normal file
@ -0,0 +1,57 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Scope3Parser/SentenceMap.h"
|
||||
#include "Scope3Parser/VarSpanNode.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
#include "Util.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct VarSpanNode;
|
||||
|
||||
struct ApplicableRuleTrie
|
||||
{
|
||||
public:
|
||||
ApplicableRuleTrie(int start, int end, const UTrieNode &node)
|
||||
: m_start(start)
|
||||
, m_end(end)
|
||||
, m_node(&node)
|
||||
, m_vstNode(NULL) {}
|
||||
|
||||
~ApplicableRuleTrie() {
|
||||
RemoveAllInColl(m_children);
|
||||
}
|
||||
|
||||
void Extend(const UTrieNode &root, int minPos, const SentenceMap &sentMap,
|
||||
bool followsGap);
|
||||
|
||||
int m_start;
|
||||
int m_end;
|
||||
const UTrieNode *m_node;
|
||||
const VarSpanNode *m_vstNode;
|
||||
std::vector<ApplicableRuleTrie*> m_children;
|
||||
};
|
||||
|
||||
}
|
50
moses/src/Scope3Parser/IntermediateVarSpanNode.h
Normal file
50
moses/src/Scope3Parser/IntermediateVarSpanNode.h
Normal file
@ -0,0 +1,50 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct IntermediateVarSpanNode
|
||||
{
|
||||
public:
|
||||
typedef std::pair<int, int> Range;
|
||||
|
||||
IntermediateVarSpanNode()
|
||||
: m_start(Range(-1, -1))
|
||||
, m_end(Range(-1, -1))
|
||||
, m_numSplitPoints(0) {}
|
||||
|
||||
IntermediateVarSpanNode(const Range &start, const Range &end)
|
||||
: m_start(start)
|
||||
, m_end(end)
|
||||
, m_numSplitPoints(0) {}
|
||||
|
||||
bool isOpen() { return m_end.second == -1; }
|
||||
bool isClosed() { return !isOpen(); }
|
||||
|
||||
Range m_start;
|
||||
Range m_end;
|
||||
int m_numSplitPoints;
|
||||
};
|
||||
|
||||
}
|
1
moses/src/Scope3Parser/Jamfile
Normal file
1
moses/src/Scope3Parser/Jamfile
Normal file
@ -0,0 +1 @@
|
||||
lib Scope3Parser : [ glob *.cpp ] ..//headers ;
|
198
moses/src/Scope3Parser/Parser.cpp
Normal file
198
moses/src/Scope3Parser/Parser.cpp
Normal file
@ -0,0 +1,198 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "Scope3Parser/Parser.h"
|
||||
|
||||
#include "ChartTranslationOptionList.h"
|
||||
#include "InputType.h"
|
||||
#include "NonTerminal.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
#include "RuleTable/UTrie.h"
|
||||
#include "Scope3Parser/ApplicableRuleTrie.h"
|
||||
#include "Scope3Parser/StackLattice.h"
|
||||
#include "Scope3Parser/StackLatticeBuilder.h"
|
||||
#include "Scope3Parser/StackLatticeSearcher.h"
|
||||
#include "Scope3Parser/VarSpanTrieBuilder.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
void Scope3Parser::GetChartRuleCollection(
|
||||
const WordsRange &range,
|
||||
bool adhereTableLimit,
|
||||
ChartTranslationOptionList &outColl)
|
||||
{
|
||||
const size_t start = range.GetStartPos();
|
||||
const size_t end = range.GetEndPos();
|
||||
const size_t ruleLimit = StaticData::Instance().GetRuleLimit();
|
||||
|
||||
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1];
|
||||
|
||||
MatchCallback matchCB(ruleLimit, adhereTableLimit, outColl);
|
||||
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
|
||||
const UTrieNode &ruleNode = *(p->first);
|
||||
const VarSpanNode &varSpanNode = *(p->second);
|
||||
|
||||
const UTrieNode::LabelMap &labelMap = ruleNode.GetLabelMap();
|
||||
|
||||
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
|
||||
assert(labelMap.size() == 1);
|
||||
const TargetPhraseCollection &tpc = labelMap.begin()->second;
|
||||
matchCB.m_tpc = &tpc;
|
||||
matchCB(m_emptyStackVec);
|
||||
} else { // Rule has at least one non-terminal.
|
||||
varSpanNode.CalculateRanges(start, end, m_ranges);
|
||||
m_latticeBuilder.Build(start, end, ruleNode, varSpanNode, m_ranges,
|
||||
this->GetCellCollection(), m_lattice,
|
||||
m_quickCheckTable);
|
||||
StackLatticeSearcher<MatchCallback> searcher(m_lattice, m_ranges);
|
||||
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
|
||||
for (; p != labelMap.end(); ++p) {
|
||||
const std::vector<int> &labels = p->first;
|
||||
const TargetPhraseCollection &tpc = p->second;
|
||||
assert(labels.size() == varSpanNode.m_rank);
|
||||
bool failCheck = false;
|
||||
for (int i = 0; i < varSpanNode.m_rank; ++i) {
|
||||
if (!m_quickCheckTable[i][labels[i]]) {
|
||||
failCheck = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (failCheck) {
|
||||
continue;
|
||||
}
|
||||
matchCB.m_tpc = &tpc;
|
||||
searcher.Search(labels, matchCB);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Scope3Parser::Init()
|
||||
{
|
||||
InitRuleApplicationVector();
|
||||
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(GetSentence());
|
||||
|
||||
// Build a map from Words to index-sets.
|
||||
SentenceMap sentMap;
|
||||
FillSentenceMap(sentence, sentMap);
|
||||
|
||||
// Build a trie containing 'elastic' application contexts
|
||||
const UTrieNode &rootNode = m_ruleTable.GetRootNode();
|
||||
std::auto_ptr<ApplicableRuleTrie> art(new ApplicableRuleTrie(-1, -1, rootNode));
|
||||
art->Extend(rootNode, -1, sentMap, false);
|
||||
|
||||
// Build a trie containing just the non-terminal contexts and insert pointers
|
||||
// to its nodes back into the ART trie. Contiguous non-terminal contexts are
|
||||
// merged and the number of split points is recorded.
|
||||
VarSpanTrieBuilder vstBuilder;
|
||||
m_varSpanTrie = vstBuilder.Build(*art);
|
||||
|
||||
// Fill each cell with a list of pointers to relevant ART nodes.
|
||||
AddRulesToCells(*art, std::make_pair<int, int>(-1, -1), sentence.GetSize()-1, 0);
|
||||
}
|
||||
|
||||
void Scope3Parser::InitRuleApplicationVector()
|
||||
{
|
||||
const size_t sourceSize = GetSentence().GetSize();
|
||||
m_ruleApplications.resize(sourceSize);
|
||||
for (size_t start = 0; start < sourceSize; ++start) {
|
||||
size_t maxSpan = sourceSize-start+1;
|
||||
m_ruleApplications[start].resize(maxSpan+1);
|
||||
}
|
||||
}
|
||||
|
||||
void Scope3Parser::FillSentenceMap(
|
||||
const Sentence &sent, SentenceMap &sentMap)
|
||||
{
|
||||
for (size_t i = 0; i < sent.GetSize(); ++i) {
|
||||
sentMap[sent.GetWord(i)].push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
void Scope3Parser::AddRulesToCells(
|
||||
const ApplicableRuleTrie &node,
|
||||
std::pair<int, int> start,
|
||||
int maxPos,
|
||||
int depth)
|
||||
{
|
||||
if (depth > 0) {
|
||||
// Determine the start range for this path if not already known.
|
||||
if (start.first == -1 && start.second == -1) {
|
||||
assert(depth == 1);
|
||||
start.first = std::max(0, node.m_start);
|
||||
start.second = node.m_start;
|
||||
} else if (start.second < 0) {
|
||||
assert(depth > 1);
|
||||
if (node.m_start == -1) {
|
||||
--start.second; // Record split point
|
||||
} else {
|
||||
int numSplitPoints = -1 - start.second;
|
||||
start.second = node.m_start - (numSplitPoints+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (node.m_node->HasRules()) {
|
||||
assert(depth > 0);
|
||||
assert(node.m_vstNode);
|
||||
// Determine the end range for this path.
|
||||
std::pair<int, int> end;
|
||||
if (node.m_end == -1) {
|
||||
end.first = (*(node.m_vstNode->m_label))[2];
|
||||
end.second = (*(node.m_vstNode->m_label))[3];
|
||||
assert(end.first != -1);
|
||||
if (end.second == -1) {
|
||||
end.second = maxPos;
|
||||
}
|
||||
} else {
|
||||
assert(node.m_start == node.m_end); // Should be a terminal
|
||||
end.first = end.second = node.m_start;
|
||||
}
|
||||
// Add a (rule trie node, VST node) pair for each cell in the range.
|
||||
int s2 = start.second;
|
||||
if (s2 < 0) {
|
||||
int numSplitPoints = -1 - s2;
|
||||
s2 = maxPos - numSplitPoints;
|
||||
}
|
||||
for (int i = start.first; i <= s2; ++i) {
|
||||
int e1 = std::max(i+depth-1, end.first);
|
||||
for (int j = e1; j <= end.second; ++j) {
|
||||
size_t span = j-i+1;
|
||||
assert(span >= 1);
|
||||
if (m_maxChartSpan && span > m_maxChartSpan) {
|
||||
break;
|
||||
}
|
||||
m_ruleApplications[i][span].push_back(std::make_pair(node.m_node,
|
||||
node.m_vstNode));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (std::vector<ApplicableRuleTrie*>::const_iterator p = node.m_children.begin(); p != node.m_children.end(); ++p) {
|
||||
AddRulesToCells(**p, start, maxPos, depth+1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Moses
|
104
moses/src/Scope3Parser/Parser.h
Normal file
104
moses/src/Scope3Parser/Parser.h
Normal file
@ -0,0 +1,104 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ChartRuleLookupManager.h"
|
||||
#include "ChartTranslationOptionList.h"
|
||||
#include "NonTerminal.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
#include "RuleTable/UTrie.h"
|
||||
#include "Scope3Parser/ApplicableRuleTrie.h"
|
||||
#include "Scope3Parser/StackLattice.h"
|
||||
#include "Scope3Parser/StackLatticeBuilder.h"
|
||||
#include "Scope3Parser/StackLatticeSearcher.h"
|
||||
#include "Scope3Parser/VarSpanTrieBuilder.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class InputType;
|
||||
class ChartCellCollection;
|
||||
class ChartHypothesisCollection;
|
||||
class WordsRange;
|
||||
|
||||
class Scope3Parser : public ChartRuleLookupManager
|
||||
{
|
||||
public:
|
||||
Scope3Parser(const InputType &sentence,
|
||||
const ChartCellCollection &cellColl,
|
||||
const RuleTableUTrie &ruleTable,
|
||||
size_t maxChartSpan)
|
||||
: ChartRuleLookupManager(sentence, cellColl)
|
||||
, m_ruleTable(ruleTable)
|
||||
, m_maxChartSpan(maxChartSpan)
|
||||
{
|
||||
Init();
|
||||
}
|
||||
|
||||
void GetChartRuleCollection(
|
||||
const WordsRange &range,
|
||||
bool adhereTableLimit,
|
||||
ChartTranslationOptionList &outColl);
|
||||
|
||||
private:
|
||||
// Define a callback type for use by StackLatticeSearcher.
|
||||
struct MatchCallback
|
||||
{
|
||||
public:
|
||||
MatchCallback(size_t ruleLimit,
|
||||
bool adhereTableLimit,
|
||||
ChartTranslationOptionList &out)
|
||||
: m_ruleLimit(ruleLimit)
|
||||
, m_adhereTableLimit(adhereTableLimit)
|
||||
, m_out(out)
|
||||
, m_tpc(NULL) {}
|
||||
void operator()(const StackVec &stackVec)
|
||||
{
|
||||
m_out.Add(*m_tpc, stackVec, m_adhereTableLimit, m_ruleLimit);
|
||||
}
|
||||
size_t m_ruleLimit;
|
||||
bool m_adhereTableLimit;
|
||||
ChartTranslationOptionList &m_out;
|
||||
const TargetPhraseCollection *m_tpc;
|
||||
};
|
||||
|
||||
void Init();
|
||||
void InitRuleApplicationVector();
|
||||
void FillSentenceMap(const Sentence &, SentenceMap &);
|
||||
void AddRulesToCells(const ApplicableRuleTrie &, std::pair<int, int>, int,
|
||||
int);
|
||||
|
||||
const RuleTableUTrie &m_ruleTable;
|
||||
std::vector<std::vector<std::vector<
|
||||
std::pair<const UTrieNode *, const VarSpanNode *> > > > m_ruleApplications;
|
||||
std::auto_ptr<VarSpanNode> m_varSpanTrie;
|
||||
StackVec m_emptyStackVec;
|
||||
const size_t m_maxChartSpan;
|
||||
StackLattice m_lattice;
|
||||
StackLatticeBuilder m_latticeBuilder;
|
||||
std::vector<VarSpanNode::NonTermRange> m_ranges;
|
||||
std::vector<std::vector<bool> > m_quickCheckTable;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
35
moses/src/Scope3Parser/SentenceMap.h
Normal file
35
moses/src/Scope3Parser/SentenceMap.h
Normal file
@ -0,0 +1,35 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Terminal.h"
|
||||
#include "Word.h"
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
typedef boost::unordered_map<Word,
|
||||
std::vector<size_t>,
|
||||
TerminalHasher,
|
||||
TerminalEqualityPred> SentenceMap;
|
||||
}
|
37
moses/src/Scope3Parser/StackLattice.h
Normal file
37
moses/src/Scope3Parser/StackLattice.h
Normal file
@ -0,0 +1,37 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "StackVec.h"
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// For an entry, lattice[i][j][k][l]:
|
||||
// i = offset from span start
|
||||
// j = NT index (zero-based, from left of rule)
|
||||
// k = span
|
||||
// l = label index (as in UTrieNode)
|
||||
typedef std::vector<std::vector<std::vector<StackVec> > > StackLattice;
|
||||
|
||||
}
|
93
moses/src/Scope3Parser/StackLatticeBuilder.cpp
Normal file
93
moses/src/Scope3Parser/StackLatticeBuilder.cpp
Normal file
@ -0,0 +1,93 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "StackLatticeBuilder.h"
|
||||
|
||||
#include "ChartCell.h"
|
||||
#include "ChartCellCollection.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
#include "Scope3Parser/StackLattice.h"
|
||||
#include "Scope3Parser/VarSpanNode.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
void StackLatticeBuilder::Build(
|
||||
int start,
|
||||
int end,
|
||||
const UTrieNode &ruleNode,
|
||||
const VarSpanNode &varSpanNode,
|
||||
const std::vector<VarSpanNode::NonTermRange> &ranges,
|
||||
const ChartCellCollection &chartCellColl,
|
||||
StackLattice &lattice,
|
||||
std::vector<std::vector<bool> > &checkTable)
|
||||
{
|
||||
// Extend the lattice if necessary. Do not shrink it.
|
||||
const size_t span = end - start + 1;
|
||||
if (lattice.size() < span) {
|
||||
lattice.resize(span);
|
||||
}
|
||||
|
||||
// Extend the quick-check table if necessary. Do not shrink it.
|
||||
if (checkTable.size() < varSpanNode.m_rank) {
|
||||
checkTable.resize(varSpanNode.m_rank);
|
||||
}
|
||||
|
||||
const UTrieNode::LabelTable &labelTable = ruleNode.GetLabelTable();
|
||||
|
||||
for (int index = 0; index < ranges.size(); ++index) {
|
||||
const VarSpanNode::NonTermRange &range = ranges[index];
|
||||
const std::vector<Word> &labelVec = labelTable[index];
|
||||
checkTable[index].clear();
|
||||
checkTable[index].resize(labelVec.size(), false);
|
||||
// Note: values in range are offsets not absolute positions.
|
||||
for (size_t offset = range.s1; offset <= range.s2; ++offset) {
|
||||
// Allocate additional space if required.
|
||||
if (lattice[offset].size() < index+1) {
|
||||
lattice[offset].resize(index+1);
|
||||
}
|
||||
size_t e1 = std::max(offset, range.e1);
|
||||
const size_t maxSpan = range.e2-offset+1;
|
||||
if (lattice[offset][index].size() < maxSpan+1) {
|
||||
lattice[offset][index].resize(maxSpan+1);
|
||||
}
|
||||
for (size_t end = e1; end <= range.e2; ++end) {
|
||||
const size_t span = end-offset+1;
|
||||
// Fill the StackVec at lattice[offset][index][span] by iterating over
|
||||
// labelTable[index] and looking up each label over the span
|
||||
// [start, end]
|
||||
StackVec &stackVec = lattice[offset][index][span];
|
||||
stackVec.clear();
|
||||
stackVec.reserve(labelVec.size());
|
||||
const WordsRange range(start+offset, start+offset+span-1);
|
||||
const ChartCell &chartCell = chartCellColl.Get(range);
|
||||
std::vector<bool>::iterator q = checkTable[index].begin();
|
||||
for (std::vector<Word>::const_iterator p = labelVec.begin();
|
||||
p != labelVec.end(); ++p) {
|
||||
const Word &label = *p;
|
||||
const HypoList *stack = chartCell.GetSortedHypotheses(label);
|
||||
stackVec.push_back(stack);
|
||||
*q++ = *q || static_cast<bool>(stack);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
42
moses/src/Scope3Parser/StackLatticeBuilder.h
Normal file
42
moses/src/Scope3Parser/StackLatticeBuilder.h
Normal file
@ -0,0 +1,42 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Scope3Parser/StackLattice.h"
|
||||
#include "Scope3Parser/VarSpanNode.h"
|
||||
#include "RuleTable/UTrieNode.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ChartCellCollection;
|
||||
|
||||
class StackLatticeBuilder
|
||||
{
|
||||
public:
|
||||
StackLatticeBuilder() {}
|
||||
|
||||
void Build(int, int, const UTrieNode &, const VarSpanNode &,
|
||||
const std::vector<VarSpanNode::NonTermRange> &,
|
||||
const ChartCellCollection &, StackLattice &,
|
||||
std::vector<std::vector<bool> > &);
|
||||
};
|
||||
|
||||
}
|
86
moses/src/Scope3Parser/StackLatticeSearcher.h
Normal file
86
moses/src/Scope3Parser/StackLatticeSearcher.h
Normal file
@ -0,0 +1,86 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Scope3Parser/StackLattice.h"
|
||||
#include "Scope3Parser/VarSpanNode.h"
|
||||
#include "StackVec.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ChartHypothesisCollection;
|
||||
|
||||
template<typename MatchCallBackType>
|
||||
class StackLatticeSearcher
|
||||
{
|
||||
public:
|
||||
StackLatticeSearcher(const StackLattice &lattice,
|
||||
const std::vector<VarSpanNode::NonTermRange> &ranges)
|
||||
: m_lattice(lattice)
|
||||
, m_ranges(ranges) {}
|
||||
|
||||
void Search(const std::vector<int> &labels, MatchCallBackType &callback)
|
||||
{
|
||||
m_labels = &labels;
|
||||
m_matchCB = &callback;
|
||||
SearchInner(0, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
void SearchInner(int start, size_t index)
|
||||
{
|
||||
assert(m_stackVec.size() == index);
|
||||
|
||||
const VarSpanNode::NonTermRange &range = m_ranges[index];
|
||||
|
||||
const size_t offset = (range.s1 == range.s2) ? range.s1 : start;
|
||||
|
||||
const size_t minSpan = std::max(offset, range.e1) - offset + 1;
|
||||
const size_t maxSpan = range.e2 - offset + 1;
|
||||
|
||||
// Loop over all possible spans for this offset and index.
|
||||
const std::vector<StackVec> &spanVec = m_lattice[offset][index];
|
||||
|
||||
for (size_t j = minSpan; j <= maxSpan; ++j) {
|
||||
const HypoList *stack = spanVec[j][(*m_labels)[index]];
|
||||
if (!stack) {
|
||||
continue;
|
||||
}
|
||||
m_stackVec.push_back(stack);
|
||||
if (index+1 == m_labels->size()) {
|
||||
(*m_matchCB)(m_stackVec);
|
||||
} else {
|
||||
SearchInner(offset+j, index+1);
|
||||
}
|
||||
m_stackVec.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
const StackLattice &m_lattice;
|
||||
const std::vector<VarSpanNode::NonTermRange> &m_ranges;
|
||||
const std::vector<int> *m_labels;
|
||||
MatchCallBackType *m_matchCB;
|
||||
StackVec m_stackVec;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
132
moses/src/Scope3Parser/VarSpanNode.h
Normal file
132
moses/src/Scope3Parser/VarSpanNode.h
Normal file
@ -0,0 +1,132 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Scope3Parser/IntermediateVarSpanNode.h"
|
||||
#include "WordsRange.h"
|
||||
|
||||
#include <boost/array.hpp>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct VarSpanNode
|
||||
{
|
||||
public:
|
||||
struct NonTermRange {
|
||||
size_t s1;
|
||||
size_t s2;
|
||||
size_t e1;
|
||||
size_t e2;
|
||||
};
|
||||
typedef std::vector<IntermediateVarSpanNode> NodeVec;
|
||||
typedef boost::array<int, 5> KeyType;
|
||||
typedef std::map<KeyType, VarSpanNode> MapType;
|
||||
|
||||
VarSpanNode() : m_parent(0), m_label(0), m_rank(0) {}
|
||||
|
||||
VarSpanNode &Insert(const NodeVec &vec)
|
||||
{
|
||||
if (vec.empty()) {
|
||||
return *this;
|
||||
}
|
||||
return Insert(vec.begin(), vec.end());
|
||||
}
|
||||
|
||||
// Given a span, determine the ranges of possible start and end offsets
|
||||
// for each non-terminal.
|
||||
void CalculateRanges(int start, int end,
|
||||
std::vector<NonTermRange> &ranges) const
|
||||
{
|
||||
ranges.resize(m_rank);
|
||||
const VarSpanNode *n = this;
|
||||
size_t firstIndex = m_rank;
|
||||
while (n->m_parent) {
|
||||
const KeyType &key = *(n->m_label);
|
||||
assert(key[0] == 0 || key[0] == key[1]);
|
||||
assert(key[3] == -1 || key[2] == key[3]);
|
||||
const int numSplitPoints = key[4];
|
||||
firstIndex -= numSplitPoints+1;
|
||||
const int vsn_start = key[0] == 0 ? start : key[0];
|
||||
const int vsn_end = key[3] == -1 ? end : key[3];
|
||||
// The start position of the first non-terminal is known.
|
||||
ranges[firstIndex].s1 = ranges[firstIndex].s2 = vsn_start - start;
|
||||
// The end range depends on the number of split points. If there are
|
||||
// no split points then the end position is fixed.
|
||||
if (numSplitPoints) {
|
||||
ranges[firstIndex].e1 = vsn_start - start;
|
||||
ranges[firstIndex].e2 = vsn_end - start - numSplitPoints;
|
||||
} else {
|
||||
ranges[firstIndex].e1 = ranges[firstIndex].e2 = vsn_end - start;
|
||||
}
|
||||
// For the remaining non-terminals, the start and end boundaries shift
|
||||
// by one position with each split point.
|
||||
for (int i = 1; i <= numSplitPoints; ++i) {
|
||||
ranges[firstIndex+i].s1 = ranges[firstIndex].s1+i;
|
||||
ranges[firstIndex+i].s2 = ranges[firstIndex].e2+i;
|
||||
ranges[firstIndex+i].e1 = ranges[firstIndex].s1+i;
|
||||
ranges[firstIndex+i].e2 = ranges[firstIndex].e2+i;
|
||||
}
|
||||
// Except that the end point of the final non-terminal is fixed.
|
||||
ranges[firstIndex+numSplitPoints].e1 = vsn_end - start;
|
||||
ranges[firstIndex+numSplitPoints].e2 = vsn_end - start;
|
||||
n = n->m_parent;
|
||||
}
|
||||
assert(firstIndex == 0);
|
||||
}
|
||||
|
||||
const VarSpanNode *m_parent;
|
||||
const KeyType *m_label;
|
||||
size_t m_rank;
|
||||
MapType m_children;
|
||||
|
||||
private:
|
||||
VarSpanNode &Insert(NodeVec::const_iterator first,
|
||||
NodeVec::const_iterator last)
|
||||
{
|
||||
assert(first != last);
|
||||
|
||||
KeyType key;
|
||||
key[0] = first->m_start.first;
|
||||
key[1] = first->m_start.second;
|
||||
key[2] = first->m_end.first;
|
||||
key[3] = first->m_end.second;
|
||||
key[4] = first->m_numSplitPoints;
|
||||
|
||||
std::pair<MapType::iterator, bool> result = m_children.insert(
|
||||
std::make_pair<KeyType, VarSpanNode>(key, VarSpanNode()));
|
||||
VarSpanNode &child = result.first->second;
|
||||
if (result.second) {
|
||||
child.m_parent = this;
|
||||
child.m_label = &(result.first->first);
|
||||
child.m_rank = m_rank + first->m_numSplitPoints + 1;
|
||||
}
|
||||
if (++first == last) {
|
||||
return child;
|
||||
}
|
||||
return child.Insert(first, last);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
109
moses/src/Scope3Parser/VarSpanTrieBuilder.cpp
Normal file
109
moses/src/Scope3Parser/VarSpanTrieBuilder.cpp
Normal file
@ -0,0 +1,109 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "Scope3Parser/VarSpanTrieBuilder.h"
|
||||
|
||||
#include "Scope3Parser/ApplicableRuleTrie.h"
|
||||
#include "Scope3Parser/IntermediateVarSpanNode.h"
|
||||
#include "Scope3Parser/VarSpanNode.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
std::auto_ptr<VarSpanNode> VarSpanTrieBuilder::Build(
|
||||
ApplicableRuleTrie &root)
|
||||
{
|
||||
std::auto_ptr<VarSpanNode> vstRoot(new VarSpanNode());
|
||||
NodeVec vec;
|
||||
const std::vector<ApplicableRuleTrie*> &children = root.m_children;
|
||||
for (std::vector<ApplicableRuleTrie*>::const_iterator p = children.begin();
|
||||
p != children.end(); ++p) {
|
||||
Build(**p, vec, *(vstRoot.get()));
|
||||
}
|
||||
return vstRoot;
|
||||
}
|
||||
|
||||
void VarSpanTrieBuilder::Build(ApplicableRuleTrie &artNode,
|
||||
NodeVec &vec,
|
||||
VarSpanNode &vstRoot)
|
||||
{
|
||||
typedef IntermediateVarSpanNode::Range Range;
|
||||
|
||||
// Record enough information about vec that any changes made during this
|
||||
// function call can be undone at the end.
|
||||
NodeVecState state;
|
||||
RecordState(vec, state);
|
||||
|
||||
if (artNode.m_end == -1) {
|
||||
if (!vec.empty() && vec.back().isOpen()) {
|
||||
++(vec.back().m_numSplitPoints);
|
||||
++(vec.back().m_end.first);
|
||||
} else if (artNode.m_start == -1) {
|
||||
Range start(0, -1);
|
||||
Range end(0, -1);
|
||||
vec.push_back(IntermediateVarSpanNode(start, end));
|
||||
} else {
|
||||
Range start(artNode.m_start, artNode.m_start);
|
||||
Range end(artNode.m_start, -1);
|
||||
vec.push_back(IntermediateVarSpanNode(start, end));
|
||||
}
|
||||
} else if (!vec.empty() && vec.back().isOpen()) {
|
||||
vec.back().m_end = Range(artNode.m_start-1, artNode.m_start-1);
|
||||
if (vec.back().m_start.second == -1) {
|
||||
size_t s = artNode.m_start - (vec.back().m_numSplitPoints + 1);
|
||||
vec.back().m_start.second = s;
|
||||
}
|
||||
}
|
||||
|
||||
if (artNode.m_node->HasRules()) {
|
||||
artNode.m_vstNode = &(vstRoot.Insert(vec));
|
||||
}
|
||||
|
||||
const std::vector<ApplicableRuleTrie*> &children = artNode.m_children;
|
||||
for (std::vector<ApplicableRuleTrie*>::const_iterator p = children.begin();
|
||||
p != children.end(); ++p) {
|
||||
Build(**p, vec, vstRoot);
|
||||
}
|
||||
|
||||
// Return vec to its original value.
|
||||
RestoreState(state, vec);
|
||||
}
|
||||
|
||||
void VarSpanTrieBuilder::RecordState(const NodeVec &vec, NodeVecState &state)
|
||||
{
|
||||
state.m_size = vec.size();
|
||||
if (!vec.empty()) {
|
||||
state.m_lastNode = vec.back();
|
||||
}
|
||||
}
|
||||
|
||||
void VarSpanTrieBuilder::RestoreState(const NodeVecState &state, NodeVec &vec)
|
||||
{
|
||||
assert(state.m_size == vec.size() || state.m_size+1 == vec.size());
|
||||
if (state.m_size < vec.size()) {
|
||||
vec.resize(state.m_size);
|
||||
} else if (!vec.empty()) {
|
||||
vec.back() = state.m_lastNode;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
50
moses/src/Scope3Parser/VarSpanTrieBuilder.h
Normal file
50
moses/src/Scope3Parser/VarSpanTrieBuilder.h
Normal file
@ -0,0 +1,50 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Scope3Parser/IntermediateVarSpanNode.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ApplicableRuleTrie;
|
||||
class VarSpanNode;
|
||||
|
||||
class VarSpanTrieBuilder
|
||||
{
|
||||
public:
|
||||
std::auto_ptr<VarSpanNode> Build(ApplicableRuleTrie &);
|
||||
|
||||
private:
|
||||
typedef std::vector<IntermediateVarSpanNode> NodeVec;
|
||||
struct NodeVecState
|
||||
{
|
||||
size_t m_size;
|
||||
IntermediateVarSpanNode m_lastNode;
|
||||
};
|
||||
void Build(ApplicableRuleTrie &, NodeVec &, VarSpanNode &);
|
||||
void RecordState(const NodeVec &, NodeVecState &);
|
||||
void RestoreState(const NodeVecState &, NodeVec &);
|
||||
};
|
||||
|
||||
}
|
@ -106,6 +106,9 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
m_verboseLevel = Scan<size_t>( m_parameter->GetParam("verbose")[0]);
|
||||
}
|
||||
|
||||
m_parsingAlgorithm = (m_parameter->GetParam("parsing-algorithm").size() > 0) ?
|
||||
(ParsingAlgorithm) Scan<size_t>(m_parameter->GetParam("parsing-algorithm")[0]) : ParseCYKPlus;
|
||||
|
||||
// to cube or not to cube
|
||||
m_searchAlgorithm = (m_parameter->GetParam("search-algorithm").size() > 0) ?
|
||||
(SearchAlgorithm) Scan<size_t>(m_parameter->GetParam("search-algorithm")[0]) : Normal;
|
||||
|
@ -129,6 +129,7 @@ protected:
|
||||
bool m_recoverPath;
|
||||
bool m_outputHypoScore;
|
||||
|
||||
ParsingAlgorithm m_parsingAlgorithm;
|
||||
SearchAlgorithm m_searchAlgorithm;
|
||||
InputTypeEnum m_inputType;
|
||||
size_t m_numInputScores;
|
||||
@ -431,6 +432,9 @@ public:
|
||||
InputTypeEnum GetInputType() const {
|
||||
return m_inputType;
|
||||
}
|
||||
ParsingAlgorithm GetParsingAlgorithm() const {
|
||||
return m_parsingAlgorithm;
|
||||
}
|
||||
SearchAlgorithm GetSearchAlgorithm() const {
|
||||
return m_searchAlgorithm;
|
||||
}
|
||||
|
@ -1,5 +1,3 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -160,6 +158,11 @@ enum DictionaryFind {
|
||||
,All = 1
|
||||
};
|
||||
|
||||
enum ParsingAlgorithm {
|
||||
ParseCYKPlus = 0,
|
||||
ParseScope3 = 1
|
||||
};
|
||||
|
||||
enum SearchAlgorithm {
|
||||
Normal = 0
|
||||
,CubePruning = 1
|
||||
|
Loading…
Reference in New Issue
Block a user