load hiero format for reading suffix arrays

This commit is contained in:
Hieu Hoang 2011-11-06 15:31:11 +07:00
parent a257803610
commit 311b5a6244
7 changed files with 70 additions and 12 deletions

View File

@ -37,6 +37,8 @@
1E1C589612F310A70067DEB7 /* ChartRuleLookupManagerMemory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1C589112F310A70067DEB7 /* ChartRuleLookupManagerMemory.h */; };
1E1C589712F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1C589212F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.cpp */; };
1E1C589812F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1C589312F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.h */; };
1E2755B314667CA4009D1DF9 /* PhraseDictionaryALSuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2755B214667CA3009D1DF9 /* PhraseDictionaryALSuffixArray.cpp */; };
1E2755B614667CC3009D1DF9 /* PhraseDictionaryALSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2755B514667CC2009D1DF9 /* PhraseDictionaryALSuffixArray.h */; };
1E2E161A132A890D00ED4085 /* ChartCell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2E1604132A890D00ED4085 /* ChartCell.cpp */; };
1E2E161B132A890D00ED4085 /* ChartCell.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2E1605132A890D00ED4085 /* ChartCell.h */; };
1E2E161C132A890D00ED4085 /* ChartCellCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2E1606132A890D00ED4085 /* ChartCellCollection.cpp */; };
@ -307,6 +309,8 @@
1E1C589112F310A70067DEB7 /* ChartRuleLookupManagerMemory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManagerMemory.h; path = src/ChartRuleLookupManagerMemory.h; sourceTree = "<group>"; };
1E1C589212F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartRuleLookupManagerOnDisk.cpp; path = src/ChartRuleLookupManagerOnDisk.cpp; sourceTree = "<group>"; };
1E1C589312F310A70067DEB7 /* ChartRuleLookupManagerOnDisk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManagerOnDisk.h; path = src/ChartRuleLookupManagerOnDisk.h; sourceTree = "<group>"; };
1E2755B214667CA3009D1DF9 /* PhraseDictionaryALSuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryALSuffixArray.cpp; path = src/PhraseDictionaryALSuffixArray.cpp; sourceTree = "<group>"; };
1E2755B514667CC2009D1DF9 /* PhraseDictionaryALSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryALSuffixArray.h; path = src/PhraseDictionaryALSuffixArray.h; sourceTree = "<group>"; };
1E2E1604132A890D00ED4085 /* ChartCell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCell.cpp; path = src/ChartCell.cpp; sourceTree = "<group>"; };
1E2E1605132A890D00ED4085 /* ChartCell.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartCell.h; path = src/ChartCell.h; sourceTree = "<group>"; };
1E2E1606132A890D00ED4085 /* ChartCellCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCellCollection.cpp; path = src/ChartCellCollection.cpp; sourceTree = "<group>"; };
@ -573,11 +577,6 @@
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
1E3CBD3D145D75F300AF72FC /* WeightedDirectedGraph.cpp */,
1E3CBD3E145D75F300AF72FC /* WeightedDirectedGraph.h */,
1E3CBD38145D755D00AF72FC /* WordLattice2.cpp */,
1E3CBD39145D755D00AF72FC /* WordLattice2.h */,
1E474E11145575CA00178AD5 /* RuleTableLoader.h */,
1E16D086144DAA3F00B60B4F /* LM */,
1ED0FD4C124BB9380029177F /* AlignmentInfo.cpp */,
1ED0FD4D124BB9380029177F /* AlignmentInfo.h */,
@ -719,6 +718,8 @@
1ED0FDE2124BB9380029177F /* PhraseDictionaryTreeAdaptor.h */,
1E078C1E14643C2000A707F4 /* PhraseDictionaryHiero.cpp */,
1E078C1B14643AED00A707F4 /* PhraseDictionaryHiero.h */,
1E2755B214667CA3009D1DF9 /* PhraseDictionaryALSuffixArray.cpp */,
1E2755B514667CC2009D1DF9 /* PhraseDictionaryALSuffixArray.h */,
1ED0FDE3124BB9380029177F /* PrefixTree.h */,
1ED0FDE4124BB9380029177F /* PrefixTreeMap.cpp */,
1ED0FDE5124BB9380029177F /* PrefixTreeMap.h */,
@ -742,6 +743,7 @@
1ED0DE261432A0D100C20FBE /* RuleTableLoaderStandard.h */,
1E078C20146440A900A707F4 /* RuleTableLoaderHiero.h */,
1E078C22146440F700A707F4 /* RuleTableLoaderHiero.cpp */,
1E474E11145575CA00178AD5 /* RuleTableLoader.h */,
1ED0FDEB124BB9380029177F /* ScoreComponentCollection.cpp */,
1ED0FDEC124BB9380029177F /* ScoreComponentCollection.h */,
1ED0FDED124BB9380029177F /* ScoreIndexManager.cpp */,
@ -804,6 +806,10 @@
1ED0FE25124BB9380029177F /* WordsBitmap.h */,
1ED0FE26124BB9380029177F /* WordsRange.cpp */,
1ED0FE27124BB9380029177F /* WordsRange.h */,
1E3CBD3D145D75F300AF72FC /* WeightedDirectedGraph.cpp */,
1E3CBD3E145D75F300AF72FC /* WeightedDirectedGraph.h */,
1E3CBD38145D755D00AF72FC /* WordLattice2.cpp */,
1E3CBD39145D755D00AF72FC /* WordLattice2.h */,
1ED0FE28124BB9380029177F /* XmlOption.cpp */,
1ED0FE29124BB9380029177F /* XmlOption.h */,
1ED0FD88124BB9380029177F /* gzfilebuf.h */,
@ -1028,6 +1034,7 @@
1E3CBD40145D75F300AF72FC /* WeightedDirectedGraph.h in Headers */,
1E078C1C14643AED00A707F4 /* PhraseDictionaryHiero.h in Headers */,
1E078C21146440A900A707F4 /* RuleTableLoaderHiero.h in Headers */,
1E2755B614667CC3009D1DF9 /* PhraseDictionaryALSuffixArray.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -1207,6 +1214,7 @@
1E3CBD3F145D75F300AF72FC /* WeightedDirectedGraph.cpp in Sources */,
1E078C1F14643C2000A707F4 /* PhraseDictionaryHiero.cpp in Sources */,
1E078C23146440F700A707F4 /* RuleTableLoaderHiero.cpp in Sources */,
1E2755B314667CA4009D1DF9 /* PhraseDictionaryALSuffixArray.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -0,0 +1,9 @@
//
// PhraseDictionaryALSuffixArray.cpp
// moses
//
// Created by Hieu Hoang on 06/11/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#include <iostream>

View File

@ -0,0 +1,25 @@
//
// PhraseDictionaryALSuffixArray.h
// moses
//
// Created by Hieu Hoang on 06/11/2011.
// Copyright 2011 __MyCompanyName__. All rights reserved.
//
#ifndef moses_PhraseDictionaryALSuffixArray_h
#define moses_PhraseDictionaryALSuffixArray_h
#include "PhraseDictionaryHiero.h"
namespace Moses {
class PhraseDictionaryALSuffixArray : public PhraseDictionaryHiero
{
public:
PhraseDictionaryHiero(size_t numScoreComponent, PhraseDictionaryFeature* feature)
: PhraseDictionaryHiero(numScoreComponent,feature) {}
}
}
#endif

View File

@ -8,6 +8,10 @@
#include <iostream>
#include "PhraseDictionaryHiero.h"
#include "PhraseDictionarySCFG.h"
#include "InputFileStream.h"
#include "RuleTableLoader.h"
#include "RuleTableLoaderFactory.h"
using namespace std;
@ -21,9 +25,20 @@ bool PhraseDictionaryHiero::Load(const std::vector<FactorType> &input
, const LMList &languageModels
, const WordPenaltyProducer* wpProducer)
{
}
m_filePath = filePath;
m_tableLimit = tableLimit;
// data from file
InputFileStream inFile(filePath);
std::auto_ptr<RuleTableLoader> loader =
RuleTableLoaderFactory::Create(filePath);
bool ret = loader->Load(input, output, inFile, weight, tableLimit,
languageModels, wpProducer, *this);
return ret;
}
} // namespace

View File

@ -9,15 +9,15 @@
#ifndef moses_PhraseDictionaryHiero_h
#define moses_PhraseDictionaryHiero_h
#include "PhraseDictionaryMemory.h"
#include "PhraseDictionarySCFG.h"
namespace Moses {
class PhraseDictionaryHiero : public PhraseDictionaryMemory
class PhraseDictionaryHiero : public PhraseDictionarySCFG
{
public:
PhraseDictionaryHiero(size_t numScoreComponent, PhraseDictionaryFeature* feature)
: PhraseDictionaryMemory(numScoreComponent,feature) {}
: PhraseDictionarySCFG(numScoreComponent,feature) {}
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output

View File

@ -76,7 +76,7 @@ class PhraseDictionarySCFG : public PhraseDictionary
TO_STRING();
private:
protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word &sourceLHS);

View File

@ -175,6 +175,7 @@ enum PhraseTableImplementation {
//,BerkeleyDb = 7
,SuffixArray = 8
,Hiero = 9
,ALSuffixArray = 10
};
enum InputTypeEnum {