add new phrase table for Scope-3 parsing. Doesn't quite work...

This commit is contained in:
Hieu Hoang 2013-11-22 15:26:14 +00:00
parent 0f3f675698
commit 5c6d88a463
6 changed files with 222 additions and 1 deletions

View File

@ -1631,6 +1631,16 @@
<type>1</type> <type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryNodeMemory.h</locationURI> <locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryNodeMemory.h</locationURI>
</link> </link>
<link>
<name>TranslationModel/PhraseDictionaryScope3.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryScope3.cpp</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryScope3.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryScope3.h</locationURI>
</link>
<link> <link>
<name>TranslationModel/PhraseDictionaryTree.cpp</name> <name>TranslationModel/PhraseDictionaryTree.cpp</name>
<type>1</type> <type>1</type>

View File

@ -8,6 +8,7 @@
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h" #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h" #include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h" #include "moses/FF/LexicalReordering/LexicalReordering.h"
@ -151,6 +152,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor); MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
MOSES_FNAME(PhraseDictionaryOnDisk); MOSES_FNAME(PhraseDictionaryOnDisk);
MOSES_FNAME(PhraseDictionaryMemory); MOSES_FNAME(PhraseDictionaryMemory);
MOSES_FNAME(PhraseDictionaryScope3);
MOSES_FNAME(PhraseDictionaryMultiModel); MOSES_FNAME(PhraseDictionaryMultiModel);
MOSES_FNAME(PhraseDictionaryMultiModelCounts); MOSES_FNAME(PhraseDictionaryMultiModelCounts);
MOSES_FNAME(PhraseDictionaryALSuffixArray); MOSES_FNAME(PhraseDictionaryALSuffixArray);

View File

@ -30,7 +30,7 @@ namespace Moses
{ {
class ChartParser; class ChartParser;
/** Implementation of a SCFG rule table in a trie. Looking up a rule of /** Implementation of a in-memory rule table in a trie. Looking up a rule of
* length n symbols requires n look-ups to find the TargetPhraseCollection. * length n symbols requires n look-ups to find the TargetPhraseCollection.
*/ */
class PhraseDictionaryMemory : public RuleTableTrie class PhraseDictionaryMemory : public RuleTableTrie

View File

@ -38,6 +38,7 @@ namespace Moses
{ {
class PhraseDictionaryMemory; class PhraseDictionaryMemory;
class PhraseDictionaryScope3;
class PhraseDictionaryFuzzyMatch; class PhraseDictionaryFuzzyMatch;
//! @todo why? //! @todo why?
@ -112,6 +113,7 @@ public:
private: private:
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemory&); friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemory&);
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryScope3&);
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryFuzzyMatch&); friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryFuzzyMatch&);
TerminalMap m_sourceTermMap; TerminalMap m_sourceTermMap;

View File

@ -0,0 +1,140 @@
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include "PhraseDictionaryScope3.h"
#include "moses/FactorCollection.h"
#include "moses/Word.h"
#include "moses/Util.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
#include "moses/UserMessage.h"
#include "moses/TranslationModel/RuleTable/LoaderFactory.h"
#include "moses/TranslationModel/RuleTable/Loader.h"
#include "moses/TranslationModel/Scope3Parser/Parser.h"
#include "moses/InputPath.h"
using namespace std;
namespace Moses
{
PhraseDictionaryScope3::PhraseDictionaryScope3(const std::string &line)
: RuleTableUTrie(line)
{
ReadParameters();
// caching for memory pt is pointless
m_maxCacheSize = 0;
}
TargetPhraseCollection &PhraseDictionaryScope3::GetOrCreateTargetPhraseCollection(
const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();
}
PhraseDictionaryNodeMemory &PhraseDictionaryScope3::GetOrCreateNode(const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)
{
const size_t size = source.GetSize();
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
PhraseDictionaryNodeMemory *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
if (word.IsNonTerminal()) {
// indexed by source label 1st
const Word &sourceNonTerm = word;
UTIL_THROW_IF(iterAlign == alignmentInfo.end(), util::Exception,
"No alignment for non-term at position " << pos);
UTIL_THROW_IF(iterAlign->first != pos, util::Exception,
"Alignment info incorrect at position " << pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
} else {
currNode = currNode->GetOrCreateChild(word);
}
UTIL_THROW_IF(currNode == NULL, util::Exception,
"Node not found at position " << pos);
}
// finally, the source LHS
//currNode = currNode->GetOrCreateChild(sourceLHS);
return *currNode;
}
ChartRuleLookupManager *PhraseDictionaryScope3::CreateRuleLookupManager(
const ChartParser &parser,
const ChartCellCollectionBase &cellCollection)
{
// FIXME This should be a parameter to CreateRuleLookupManager
size_t maxChartSpan = 0;
return new Scope3Parser(parser, cellCollection, *this, maxChartSpan);
}
void PhraseDictionaryScope3::SortAndPrune()
{
if (GetTableLimit()) {
m_collection.Sort(GetTableLimit());
}
}
TO_STRING_BODY(PhraseDictionaryScope3);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryScope3& phraseDict)
{
typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
const Word &sourceNonTerm = p->first.first;
out << sourceNonTerm;
}
for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
const Word &sourceTerm = p->first;
out << sourceTerm;
}
return out;
}
}

View File

@ -0,0 +1,67 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "PhraseDictionaryNodeMemory.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/InputType.h"
#include "moses/NonTerminal.h"
#include "moses/TranslationModel/RuleTable/UTrie.h"
#include "util/check.hh"
namespace Moses
{
class ChartParser;
/** Scope-3 only
*/
class PhraseDictionaryScope3 : public RuleTableUTrie
{
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryScope3&);
friend class RuleTableLoader;
public:
PhraseDictionaryScope3(const std::string &line);
const PhraseDictionaryNodeMemory &GetRootNode() const {
return m_collection;
}
ChartRuleLookupManager*
CreateRuleLookupManager(
const ChartParser &,
const ChartCellCollectionBase &);
TO_STRING();
protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS);
void SortAndPrune();
PhraseDictionaryNodeMemory m_collection;
};
} // namespace Moses