This commit is contained in:
Hieu Hoang 2011-10-30 19:17:15 +07:00
parent fdd6c9795d
commit b6b697c545
11 changed files with 198 additions and 15 deletions

View File

@ -48,6 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "Sentence.h"
#include "ConfusionNet.h"
#include "WordLattice.h"
#include "WordLattice2.h"
#include "TreeInput.h"
#include "TranslationAnalysis.h"
#include "mbr.h"
@ -158,6 +159,9 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
case TreeInputType:
source = ioWrapper.GetInput(new TreeInput(Input));
break;
case WordLatticeInput2:
source = ioWrapper.GetInput(new WordLattice2);
break;
default:
TRACE_ERR("Unknown input type: " << inputType << "\n");
}

View File

@ -60,6 +60,10 @@
1E2E163D132A892800ED4085 /* RuleCubeQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2E1636132A892800ED4085 /* RuleCubeQueue.h */; };
1E2E163E132A892800ED4085 /* ThreadPool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2E1637132A892800ED4085 /* ThreadPool.cpp */; };
1E2E163F132A892800ED4085 /* ThreadPool.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2E1638132A892800ED4085 /* ThreadPool.h */; };
1E3CBD3A145D755D00AF72FC /* WordLattice2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E3CBD38145D755D00AF72FC /* WordLattice2.cpp */; };
1E3CBD3B145D755D00AF72FC /* WordLattice2.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E3CBD39145D755D00AF72FC /* WordLattice2.h */; };
1E3CBD3F145D75F300AF72FC /* WeightedDirectedGraph.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E3CBD3D145D75F300AF72FC /* WeightedDirectedGraph.cpp */; };
1E3CBD40145D75F300AF72FC /* WeightedDirectedGraph.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E3CBD3E145D75F300AF72FC /* WeightedDirectedGraph.h */; };
1E46B5A613BA5C7F0084F898 /* RuleCubeItem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E46B5A413BA5C7F0084F898 /* RuleCubeItem.cpp */; };
1E46B5A713BA5C7F0084F898 /* RuleCubeItem.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E46B5A513BA5C7F0084F898 /* RuleCubeItem.h */; };
1E474E12145575CA00178AD5 /* RuleTableLoader.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E474E11145575CA00178AD5 /* RuleTableLoader.h */; };
@ -322,6 +326,10 @@
1E2E1636132A892800ED4085 /* RuleCubeQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RuleCubeQueue.h; path = src/RuleCubeQueue.h; sourceTree = "<group>"; };
1E2E1637132A892800ED4085 /* ThreadPool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ThreadPool.cpp; path = src/ThreadPool.cpp; sourceTree = "<group>"; };
1E2E1638132A892800ED4085 /* ThreadPool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThreadPool.h; path = src/ThreadPool.h; sourceTree = "<group>"; };
1E3CBD38145D755D00AF72FC /* WordLattice2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = WordLattice2.cpp; path = src/WordLattice2.cpp; sourceTree = "<group>"; };
1E3CBD39145D755D00AF72FC /* WordLattice2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = WordLattice2.h; path = src/WordLattice2.h; sourceTree = "<group>"; };
1E3CBD3D145D75F300AF72FC /* WeightedDirectedGraph.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = WeightedDirectedGraph.cpp; path = src/WeightedDirectedGraph.cpp; sourceTree = "<group>"; };
1E3CBD3E145D75F300AF72FC /* WeightedDirectedGraph.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = WeightedDirectedGraph.h; path = src/WeightedDirectedGraph.h; sourceTree = "<group>"; };
1E46B5A413BA5C7F0084F898 /* RuleCubeItem.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RuleCubeItem.cpp; path = src/RuleCubeItem.cpp; sourceTree = "<group>"; };
1E46B5A513BA5C7F0084F898 /* RuleCubeItem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RuleCubeItem.h; path = src/RuleCubeItem.h; sourceTree = "<group>"; };
1E474E11145575CA00178AD5 /* RuleTableLoader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RuleTableLoader.h; path = src/RuleTableLoader.h; sourceTree = "<group>"; };
@ -557,6 +565,10 @@
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
1E3CBD3D145D75F300AF72FC /* WeightedDirectedGraph.cpp */,
1E3CBD3E145D75F300AF72FC /* WeightedDirectedGraph.h */,
1E3CBD38145D755D00AF72FC /* WordLattice2.cpp */,
1E3CBD39145D755D00AF72FC /* WordLattice2.h */,
1E474E11145575CA00178AD5 /* RuleTableLoader.h */,
1E16D086144DAA3F00B60B4F /* LM */,
1ED0FD4C124BB9380029177F /* AlignmentInfo.cpp */,
@ -1000,6 +1012,8 @@
1E16D0BC144DAA6C00B60B4F /* SingleFactor.h in Headers */,
1E16D0BE144DAA6C00B60B4F /* SRI.h in Headers */,
1E474E12145575CA00178AD5 /* RuleTableLoader.h in Headers */,
1E3CBD3B145D755D00AF72FC /* WordLattice2.h in Headers */,
1E3CBD40145D75F300AF72FC /* WeightedDirectedGraph.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -1175,6 +1189,8 @@
1E16D0B5144DAA6C00B60B4F /* ParallelBackoff.cpp in Sources */,
1E16D0BB144DAA6C00B60B4F /* SingleFactor.cpp in Sources */,
1E16D0BD144DAA6C00B60B4F /* SRI.cpp in Sources */,
1E3CBD3A145D755D00AF72FC /* WordLattice2.cpp in Sources */,
1E3CBD3F145D75F300AF72FC /* WeightedDirectedGraph.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -125,8 +125,10 @@ libmoses_la_HEADERS = \
UniqueObject.h \
UserMessage.h \
Util.h \
WeightedDirectedGraph.h \
Word.h \
WordLattice.h \
WordLattice2.h \
WordsBitmap.h \
WordsRange.h \
XmlOption.h
@ -270,8 +272,10 @@ libmoses_la_SOURCES = \
TrellisPathCollection.cpp \
UserMessage.cpp \
Util.cpp \
WeightedDirectedGraph.cpp \
Word.cpp \
WordLattice.cpp \
WordLattice2.cpp \
WordsBitmap.cpp \
WordsRange.cpp \
XmlOption.cpp

View File

@ -165,5 +165,11 @@ CN parsePCN(const std::string& in)
return res;
}
Moses::WeightedDirectedGraph parseLattice(const std::string& in)
{
return Moses::WeightedDirectedGraph();
}
}

View File

@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include <utility>
#include <cstdlib>
#include "WeightedDirectedGraph.h"
/** A couple of utilities to read .pcn files. A python-compatible format
* for encoding confusion networks and word lattices.
@ -36,11 +37,12 @@ namespace PCN
typedef std::pair<std::pair<std::string, std::vector<float> >, size_t> CNAlt;
typedef std::vector<CNAlt> CNCol;
typedef std::vector<CNCol> CN;
/** Given a string ((('foo',0.1,1),('bar',0.9,2)),...) representation of a
* word lattice in PCN format, return a CN object representing the lattice
*/
CN parsePCN(const std::string& in);
Moses::WeightedDirectedGraph parseLattice(const std::string& in);
};

View File

@ -991,25 +991,31 @@ bool StaticData::LoadPhraseTables()
// it only work with binrary file. This is a hack
m_numInputScores=m_parameter->GetParam("weight-i").size();
for(unsigned k=0; k<m_numInputScores; ++k)
weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
if (implementation == Binary)
{
for(unsigned k=0; k<m_numInputScores; ++k)
weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
}
if(m_parameter->GetParam("link-param-count").size())
m_numLinkParams = Scan<size_t>(m_parameter->GetParam("link-param-count")[0]);
//print some info about this interaction:
if (m_numLinkParams == m_numInputScores) {
VERBOSE(1,"specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count.\n");
} else if ((m_numLinkParams + 1) == m_numInputScores) {
VERBOSE(1,"WARN: "<< m_numInputScores << " insertion weights found and only "<< m_numLinkParams << " link parameters specified, applying non-epsilon 'real' word link count for last feature weight.\n");
} else {
stringstream strme;
strme << "You specified " << m_numInputScores
<< " input weights (weight-i), but you specified " << m_numLinkParams << " link parameters (link-param-count)!";
UserMessage::Add(strme.str());
return false;
if (implementation == Binary) {
if (m_numLinkParams == m_numInputScores) {
VERBOSE(1,"specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count.\n");
} else if ((m_numLinkParams + 1) == m_numInputScores) {
VERBOSE(1,"WARN: "<< m_numInputScores << " insertion weights found and only "<< m_numLinkParams << " link parameters specified, applying non-epsilon 'real' word link count for last feature weight.\n");
} else {
stringstream strme;
strme << "You specified " << m_numInputScores
<< " input weights (weight-i), but you specified " << m_numLinkParams << " link parameters (link-param-count)!";
UserMessage::Add(strme.str());
return false;
}
}
}
if (!m_inputType) {
m_numInputScores=0;

View File

@ -181,6 +181,8 @@ enum InputTypeEnum {
,ConfusionNetworkInput = 1
,WordLatticeInput = 2
,TreeInputType = 3
,WordLatticeInput2 = 4
};
enum XmlInputType {

View File

@ -0,0 +1,9 @@
//
// WeightedDirectedGraph.cpp
// moses
//
// Created by Hieu Hoang on 30/10/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#include <iostream>

View File

@ -0,0 +1,26 @@
//
// WeightedDirectedGraph.h
// moses
//
// Created by Hieu Hoang on 30/10/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#ifndef moses_WeightedDirectedGraph_h
#define moses_WeightedDirectedGraph_h
namespace Moses {
class WeightedDirectedGraphNode
{
};
class WeightedDirectedGraph
{
};
}
#endif

View File

@ -0,0 +1,60 @@
//
// WordLattice2.cpp
// moses
//
// Created by Hieu Hoang on 30/10/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#include <iostream>
#include "WordLattice2.h"
#include "PCNTools.h"
using namespace std;
namespace Moses
{
size_t WordLattice2::GetSize() const
{
}
int WordLattice2::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
{
std::string line;
if(!getline(in,line)) return 0;
PCN::parseLattice(line);
}
void WordLattice2::Print(std::ostream&) const
{
}
//! create trans options specific to this InputType
TranslationOptionCollection* WordLattice2::CreateTranslationOptionCollection(const TranslationSystem* system) const
{
}
//! return substring. Only valid for Sentence class. TODO - get rid of this fn
Phrase WordLattice2::GetSubString(const WordsRange&) const
{
}
//! return substring at a particular position. Only valid for Sentence class. TODO - get rid of this fn
const Word& WordLattice2::GetWord(size_t pos) const
{
}
const NonTerminalSet &WordLattice2::GetLabelSet(size_t startPos, size_t endPos) const
{
}
}

48
moses/src/WordLattice2.h Normal file
View File

@ -0,0 +1,48 @@
//
// WordLattice2.h
// moses
//
// Created by Hieu Hoang on 30/10/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#ifndef moses_WordLattice2_h
#define moses_WordLattice2_h
#include "InputType.h"
namespace Moses
{
class WordLattice2 : public InputType
{
public:
InputTypeEnum GetType() const
{ return WordLatticeInput2; }
virtual size_t GetSize() const;
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
//! Output debugging info to stream out
virtual void Print(std::ostream&) const;
//! create trans options specific to this InputType
virtual TranslationOptionCollection* CreateTranslationOptionCollection(const TranslationSystem* system) const;
//! return substring. Only valid for Sentence class. TODO - get rid of this fn
virtual Phrase GetSubString(const WordsRange&) const;
//! return substring at a particular position. Only valid for Sentence class. TODO - get rid of this fn
virtual const Word& GetWord(size_t pos) const;
virtual const NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) const;
};
}
#endif