mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 14:50:41 +03:00
beautify
This commit is contained in:
parent
91cb549ccf
commit
05ead45e71
@ -153,19 +153,19 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
// store only the 3rd one (rule count)
|
||||
float val = Moses::Scan<float>(tok);
|
||||
misc[0] = val;
|
||||
break;
|
||||
// store only the 3rd one (rule count)
|
||||
float val = Moses::Scan<float>(tok);
|
||||
misc[0] = val;
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
// sparse features
|
||||
sparseFeatures << tok << " ";
|
||||
// sparse features
|
||||
sparseFeatures << tok << " ";
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
property << tok << " ";
|
||||
break;
|
||||
property << tok << " ";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
cerr << "ERROR in line " << line << endl;
|
||||
|
@ -166,10 +166,10 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
|
||||
size_t propSize = m_property.size();
|
||||
|
||||
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
|
||||
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
|
||||
+ sizeof(float) * numScores // scores
|
||||
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
|
||||
+ sizeof(UINT64) + propSize; // property string
|
||||
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
|
||||
+ sizeof(float) * numScores // scores
|
||||
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
|
||||
+ sizeof(UINT64) + propSize; // property string
|
||||
|
||||
char *mem = (char*) malloc(memNeeded);
|
||||
//memset(mem, 0, memNeeded);
|
||||
@ -350,13 +350,13 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
|
||||
bytesRead += sizeof(UINT64);
|
||||
|
||||
if (strSize) {
|
||||
char *mem = (char*) malloc(strSize + 1);
|
||||
mem[strSize] = '\0';
|
||||
fileTPColl.read(mem, strSize);
|
||||
outStr = string(mem);
|
||||
free(mem);
|
||||
char *mem = (char*) malloc(strSize + 1);
|
||||
mem[strSize] = '\0';
|
||||
fileTPColl.read(mem, strSize);
|
||||
outStr = string(mem);
|
||||
free(mem);
|
||||
|
||||
bytesRead += strSize;
|
||||
bytesRead += strSize;
|
||||
}
|
||||
|
||||
return bytesRead;
|
||||
|
@ -113,14 +113,12 @@ public:
|
||||
|
||||
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
|
||||
|
||||
void SetProperty(const std::string &value)
|
||||
{
|
||||
m_property = value;
|
||||
void SetProperty(const std::string &value) {
|
||||
m_property = value;
|
||||
}
|
||||
|
||||
void SetSparseFeatures(const std::string &value)
|
||||
{
|
||||
m_sparseFeatures = value;
|
||||
void SetSparseFeatures(const std::string &value) {
|
||||
m_sparseFeatures = value;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -105,18 +105,17 @@ void Word::ConvertToMoses(
|
||||
overwrite = Moses::Word(m_isNonTerminal);
|
||||
|
||||
if (m_isNonTerminal) {
|
||||
const std::string &tok = vocab.GetString(m_vocabId);
|
||||
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
|
||||
}
|
||||
else {
|
||||
// TODO: this conversion should have been done at load time.
|
||||
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
|
||||
const std::string &tok = vocab.GetString(m_vocabId);
|
||||
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
|
||||
} else {
|
||||
// TODO: this conversion should have been done at load time.
|
||||
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
|
||||
|
||||
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
|
||||
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
|
||||
}
|
||||
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
|
||||
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
|
||||
}
|
||||
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,8 @@ size_t lookup( string );
|
||||
vector<string> tokenize( const char input[] );
|
||||
SuffixArray suffixArray;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// handle parameters
|
||||
string query;
|
||||
string fileNameSuffix;
|
||||
@ -95,14 +96,14 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
cout << lookup( query ) << endl;
|
||||
}
|
||||
}
|
||||
else if (queryFlag) {
|
||||
} else if (queryFlag) {
|
||||
cout << lookup( query ) << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t lookup( string query ) {
|
||||
size_t lookup( string query )
|
||||
{
|
||||
cerr << "query is " << query << endl;
|
||||
vector< string > queryString = tokenize( query.c_str() );
|
||||
return suffixArray.Count( queryString );
|
||||
|
@ -61,7 +61,8 @@ void SparseVector::set(const string& name, FeatureStatsType value)
|
||||
m_fvector[id] = value;
|
||||
}
|
||||
|
||||
void SparseVector::set(size_t id, FeatureStatsType value) {
|
||||
void SparseVector::set(size_t id, FeatureStatsType value)
|
||||
{
|
||||
assert(m_id_to_name.size() > id);
|
||||
m_fvector[id] = value;
|
||||
}
|
||||
@ -204,7 +205,7 @@ FeatureStats::FeatureStats(const size_t size)
|
||||
|
||||
FeatureStats::~FeatureStats()
|
||||
{
|
||||
delete [] m_array;
|
||||
delete [] m_array;
|
||||
}
|
||||
|
||||
void FeatureStats::Copy(const FeatureStats &stats)
|
||||
|
@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
|
||||
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec)
|
||||
{
|
||||
out << "[";
|
||||
for (size_t i = 0; i < wordVec.size(); ++i) {
|
||||
out << wordVec[i]->first;
|
||||
@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
|
||||
}
|
||||
|
||||
|
||||
void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
|
||||
void ReferenceSet::Load(const vector<string>& files, Vocab& vocab)
|
||||
{
|
||||
for (size_t i = 0; i < files.size(); ++i) {
|
||||
util::FilePiece fh(files[i].c_str());
|
||||
size_t sentenceId = 0;
|
||||
@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
|
||||
} catch (util::EndOfFileException &e) {
|
||||
break;
|
||||
}
|
||||
AddLine(sentenceId, line, vocab);
|
||||
++sentenceId;
|
||||
AddLine(sentenceId, line, vocab);
|
||||
++sentenceId;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
|
||||
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
|
||||
{
|
||||
//cerr << line << endl;
|
||||
NgramCounter ngramCounts;
|
||||
list<WordVec> openNgrams;
|
||||
@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
|
||||
openNgrams.push_front(WordVec());
|
||||
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
|
||||
k->push_back(nextTok);
|
||||
++ngramCounts[*k];
|
||||
++ngramCounts[*k];
|
||||
}
|
||||
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
|
||||
}
|
||||
|
||||
//merge into overall ngram map
|
||||
for (NgramCounter::const_iterator ni = ngramCounts.begin();
|
||||
ni != ngramCounts.end(); ++ni) {
|
||||
ni != ngramCounts.end(); ++ni) {
|
||||
size_t count = ni->second;
|
||||
//cerr << *ni << " " << count << endl;
|
||||
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
|
||||
@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
|
||||
//cerr << endl;
|
||||
|
||||
}
|
||||
|
||||
size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
|
||||
|
||||
size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const
|
||||
{
|
||||
const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
|
||||
NgramMap::const_iterator ngi = ngramCounts.find(ngram);
|
||||
if (ngi == ngramCounts.end()) return 0;
|
||||
@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
|
||||
|
||||
VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
|
||||
|
||||
void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
|
||||
void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
|
||||
{
|
||||
for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
|
||||
//cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
|
||||
size_t order = ngi->first.size();
|
||||
@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
|
||||
}
|
||||
}
|
||||
|
||||
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
|
||||
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
|
||||
{
|
||||
size_t targetLength = 0;
|
||||
for (size_t i = 0; i < edge.Words().size(); ++i) {
|
||||
const Vocab::Entry* word = edge.Words()[i];
|
||||
@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
|
||||
return targetLength;
|
||||
}
|
||||
|
||||
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
|
||||
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
|
||||
{
|
||||
NgramCounter ngramCounts;
|
||||
size_t childId = 0;
|
||||
size_t wordId = 0;
|
||||
@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
|
||||
bool inRightContext = false;
|
||||
list<WordVec> openNgrams;
|
||||
const Vocab::Entry* currentWord = NULL;
|
||||
while (wordId < edge.Words().size()) {
|
||||
while (wordId < edge.Words().size()) {
|
||||
currentWord = edge.Words()[wordId];
|
||||
if (currentWord != NULL) {
|
||||
++wordId;
|
||||
@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
|
||||
}
|
||||
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
|
||||
}
|
||||
|
||||
|
||||
//Collect matches
|
||||
//This edge
|
||||
//cerr << "edge ngrams" << endl;
|
||||
@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
|
||||
bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
FeatureStatsType sourceLength = head.SourceCovered();
|
||||
size_t referenceLength = references_.Length(sentenceId_);
|
||||
FeatureStatsType effectiveReferenceLength =
|
||||
FeatureStatsType effectiveReferenceLength =
|
||||
sourceLength / totalSourceLength_ * referenceLength;
|
||||
|
||||
bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
|
||||
//backgroundBleu_[backgroundBleu_.size()-1] =
|
||||
//backgroundBleu_[backgroundBleu_.size()-1] =
|
||||
// backgroundRefLength_ * sourceLength / totalSourceLength_;
|
||||
FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
|
||||
|
||||
return bleu;
|
||||
}
|
||||
|
||||
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
|
||||
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
|
||||
{
|
||||
//TODO: Maybe more efficient to absorb into the Score() method
|
||||
VertexState& vertexState = vertexStates_[vertexId];
|
||||
//cerr << "Updating state for " << vertexId << endl;
|
||||
|
||||
|
||||
//leftContext
|
||||
int wi = 0;
|
||||
const VertexState* childState = NULL;
|
||||
@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
|
||||
//start of child state
|
||||
childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
|
||||
contexti = 0;
|
||||
}
|
||||
}
|
||||
if ((size_t)contexti < childState->leftContext.size()) {
|
||||
vertexState.leftContext.push_back(childState->leftContext[contexti++]);
|
||||
vertexState.leftContext.push_back(childState->leftContext[contexti++]);
|
||||
} else {
|
||||
//end of child context
|
||||
childState = NULL;
|
||||
@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
|
||||
* Recurse through back pointers
|
||||
**/
|
||||
static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
|
||||
HgHypothesis* bestHypo) {
|
||||
HgHypothesis* bestHypo)
|
||||
{
|
||||
//cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
|
||||
//UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
|
||||
if (!bps[vertexId].first) return;
|
||||
@ -334,7 +344,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
|
||||
}
|
||||
}
|
||||
|
||||
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
|
||||
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
|
||||
{
|
||||
BackPointer init(NULL,kMinScore);
|
||||
vector<BackPointer> backPointers(graph.VertexSize(),init);
|
||||
@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
|
||||
//UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
|
||||
//If no incoming edges, vertex is a dead end
|
||||
backPointers[vi].first = NULL;
|
||||
backPointers[vi].second = kMinScore;
|
||||
backPointers[vi].second = kMinScore;
|
||||
} else {
|
||||
//cerr << "\nVertex: " << vi << endl;
|
||||
for (size_t ei = 0; ei < incoming.size(); ++ei) {
|
||||
@ -362,10 +372,10 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
|
||||
incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
|
||||
}
|
||||
vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
|
||||
// cerr << "Score: " << incomingScore << " Bleu: ";
|
||||
// if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
|
||||
// cerr << "Score: " << incomingScore << " Bleu: ";
|
||||
// if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
|
||||
FeatureStatsType totalScore = incomingScore;
|
||||
if (bleuWeight) {
|
||||
if (bleuWeight) {
|
||||
FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
|
||||
if (isnan(bleuScore)) {
|
||||
cerr << "WARN: bleu score undefined" << endl;
|
||||
@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
|
||||
}
|
||||
//UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
|
||||
totalScore += bleuWeight * bleuScore;
|
||||
// cerr << bleuScore << " Total: " << incomingScore << endl << endl;
|
||||
// cerr << bleuScore << " Total: " << incomingScore << endl << endl;
|
||||
//cerr << "is " << incomingScore << " bs " << bleuScore << endl;
|
||||
}
|
||||
if (totalScore >= winnerScore) {
|
||||
|
@ -27,7 +27,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "BleuScorer.h"
|
||||
#include "Hypergraph.h"
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
|
||||
|
||||
@ -47,18 +48,21 @@ struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&,
|
||||
typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
|
||||
|
||||
|
||||
class ReferenceSet {
|
||||
class ReferenceSet
|
||||
{
|
||||
|
||||
|
||||
public:
|
||||
|
||||
|
||||
void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
|
||||
|
||||
void Load(const std::vector<std::string>& files, Vocab& vocab);
|
||||
|
||||
size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
|
||||
|
||||
size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
|
||||
size_t Length(size_t sentenceId) const {
|
||||
return lengths_[sentenceId];
|
||||
}
|
||||
|
||||
private:
|
||||
//ngrams to (clipped,unclipped) counts
|
||||
@ -80,31 +84,32 @@ struct VertexState {
|
||||
/**
|
||||
* Used to score an rule (ie edge) when we are applying it.
|
||||
**/
|
||||
class HgBleuScorer {
|
||||
public:
|
||||
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
|
||||
class HgBleuScorer
|
||||
{
|
||||
public:
|
||||
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
|
||||
references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
|
||||
backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
|
||||
vertexStates_.resize(graph.VertexSize());
|
||||
totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
|
||||
}
|
||||
backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
|
||||
vertexStates_.resize(graph.VertexSize());
|
||||
totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
|
||||
}
|
||||
|
||||
FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
|
||||
FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
|
||||
|
||||
void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
|
||||
void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
|
||||
|
||||
|
||||
private:
|
||||
const ReferenceSet& references_;
|
||||
std::vector<VertexState> vertexStates_;
|
||||
size_t sentenceId_;
|
||||
size_t totalSourceLength_;
|
||||
const Graph& graph_;
|
||||
std::vector<FeatureStatsType> backgroundBleu_;
|
||||
FeatureStatsType backgroundRefLength_;
|
||||
private:
|
||||
const ReferenceSet& references_;
|
||||
std::vector<VertexState> vertexStates_;
|
||||
size_t sentenceId_;
|
||||
size_t totalSourceLength_;
|
||||
const Graph& graph_;
|
||||
std::vector<FeatureStatsType> backgroundBleu_;
|
||||
FeatureStatsType backgroundRefLength_;
|
||||
|
||||
void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
|
||||
size_t GetTargetLength(const Edge& edge) const;
|
||||
void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
|
||||
size_t GetTargetLength(const Edge& edge) const;
|
||||
};
|
||||
|
||||
struct HgHypothesis {
|
||||
|
@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
|
||||
Vocab vocab;
|
||||
WordVec words;
|
||||
string wordStrings[] =
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
|
||||
}
|
||||
@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
|
||||
Vocab vocab;
|
||||
WordVec words;
|
||||
string wordStrings[] =
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
|
||||
for (size_t i = 0; i < 13; ++i) {
|
||||
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
|
||||
}
|
||||
|
@ -34,11 +34,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
using namespace std;
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
static const ValType BLEU_RATIO = 5;
|
||||
|
||||
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
|
||||
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
|
||||
{
|
||||
vector<ValType> stats(scorer_->NumberOfScores(),0);
|
||||
for(reset(); !finished(); next()) {
|
||||
vector<ValType> sent;
|
||||
@ -51,13 +53,14 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
|
||||
}
|
||||
|
||||
NbestHopeFearDecoder::NbestHopeFearDecoder(
|
||||
const vector<string>& featureFiles,
|
||||
const vector<string>& scoreFiles,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
Scorer* scorer
|
||||
) : safe_hope_(safe_hope) {
|
||||
const vector<string>& featureFiles,
|
||||
const vector<string>& scoreFiles,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
Scorer* scorer
|
||||
) : safe_hope_(safe_hope)
|
||||
{
|
||||
scorer_ = scorer;
|
||||
if (streaming) {
|
||||
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
|
||||
@ -67,25 +70,29 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
|
||||
}
|
||||
|
||||
|
||||
void NbestHopeFearDecoder::next() {
|
||||
void NbestHopeFearDecoder::next()
|
||||
{
|
||||
train_->next();
|
||||
}
|
||||
|
||||
bool NbestHopeFearDecoder::finished() {
|
||||
bool NbestHopeFearDecoder::finished()
|
||||
{
|
||||
return train_->finished();
|
||||
}
|
||||
|
||||
void NbestHopeFearDecoder::reset() {
|
||||
void NbestHopeFearDecoder::reset()
|
||||
{
|
||||
train_->reset();
|
||||
}
|
||||
|
||||
void NbestHopeFearDecoder::HopeFear(
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
) {
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
)
|
||||
{
|
||||
|
||||
|
||||
|
||||
// Hope / fear decode
|
||||
ValType hope_scale = 1.0;
|
||||
size_t hope_index=0, fear_index=0, model_index=0;
|
||||
@ -134,7 +141,8 @@ void NbestHopeFearDecoder::HopeFear(
|
||||
hopeFear->hopeFearEqual = (hope_index == fear_index);
|
||||
}
|
||||
|
||||
void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
|
||||
void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
|
||||
{
|
||||
// Find max model
|
||||
size_t max_index=0;
|
||||
ValType max_score=0;
|
||||
@ -152,18 +160,19 @@ void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValTy
|
||||
|
||||
|
||||
HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
(
|
||||
const string& hypergraphDir,
|
||||
const vector<string>& referenceFiles,
|
||||
size_t num_dense,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
size_t hg_pruning,
|
||||
const MiraWeightVector& wv,
|
||||
Scorer* scorer
|
||||
) :
|
||||
num_dense_(num_dense) {
|
||||
(
|
||||
const string& hypergraphDir,
|
||||
const vector<string>& referenceFiles,
|
||||
size_t num_dense,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
size_t hg_pruning,
|
||||
const MiraWeightVector& wv,
|
||||
Scorer* scorer
|
||||
) :
|
||||
num_dense_(num_dense)
|
||||
{
|
||||
|
||||
UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
|
||||
UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
|
||||
@ -177,17 +186,17 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
static const string kWeights = "weights";
|
||||
fs::directory_iterator dend;
|
||||
size_t fileCount = 0;
|
||||
|
||||
|
||||
cerr << "Reading hypergraphs" << endl;
|
||||
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
|
||||
const fs::path& hgpath = di->path();
|
||||
if (hgpath.filename() == kWeights) continue;
|
||||
// cerr << "Reading " << hgpath.filename() << endl;
|
||||
// cerr << "Reading " << hgpath.filename() << endl;
|
||||
Graph graph(vocab_);
|
||||
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
|
||||
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
|
||||
//util::FilePiece file(di->path().string().c_str());
|
||||
util::FilePiece file(fd.release());
|
||||
util::FilePiece file(fd.release());
|
||||
ReadGraph(file,graph);
|
||||
|
||||
//cerr << "ref length " << references_.Length(id) << endl;
|
||||
@ -196,7 +205,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
prunedGraph.reset(new Graph(vocab_));
|
||||
graph.Prune(prunedGraph.get(), weights, edgeCount);
|
||||
graphs_[id] = prunedGraph;
|
||||
// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
|
||||
// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
|
||||
++fileCount;
|
||||
if (fileCount % 10 == 0) cerr << ".";
|
||||
if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
|
||||
@ -211,23 +220,27 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::reset() {
|
||||
void HypergraphHopeFearDecoder::reset()
|
||||
{
|
||||
sentenceIdIter_ = sentenceIds_.begin();
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::next() {
|
||||
void HypergraphHopeFearDecoder::next()
|
||||
{
|
||||
sentenceIdIter_++;
|
||||
}
|
||||
|
||||
bool HypergraphHopeFearDecoder::finished() {
|
||||
bool HypergraphHopeFearDecoder::finished()
|
||||
{
|
||||
return sentenceIdIter_ == sentenceIds_.end();
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::HopeFear(
|
||||
const vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
) {
|
||||
const vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
)
|
||||
{
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
@ -247,12 +260,12 @@ void HypergraphHopeFearDecoder::HopeFear(
|
||||
Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
|
||||
|
||||
|
||||
// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
|
||||
// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
|
||||
// where model score is having far more influence than BLEU
|
||||
// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
|
||||
// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
|
||||
// hope_scale = abs(hope_bleu) / abs(hope_model);
|
||||
// else break;
|
||||
// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
|
||||
// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
|
||||
// hope_scale = abs(hope_bleu) / abs(hope_model);
|
||||
// else break;
|
||||
//TODO: Don't currently get model and bleu so commented this out for now.
|
||||
break;
|
||||
}
|
||||
@ -311,15 +324,16 @@ void HypergraphHopeFearDecoder::HopeFear(
|
||||
if (hopeFear->hopeFearEqual) {
|
||||
for (size_t i = 0; i < fearStats.size(); ++i) {
|
||||
if (fearStats[i] != hopeFear->hopeStats[i]) {
|
||||
hopeFear->hopeFearEqual = false;
|
||||
break;
|
||||
hopeFear->hopeFearEqual = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
|
||||
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
|
||||
{
|
||||
assert(!finished());
|
||||
HgHypothesis bestHypo;
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
|
@ -35,7 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
// the n-best list and lattice/hypergraph implementations
|
||||
//
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
class Scorer;
|
||||
|
||||
@ -44,7 +45,7 @@ struct HopeFearData {
|
||||
MiraFeatureVector modelFeatures;
|
||||
MiraFeatureVector hopeFeatures;
|
||||
MiraFeatureVector fearFeatures;
|
||||
|
||||
|
||||
std::vector<float> modelStats;
|
||||
std::vector<float> hopeStats;
|
||||
|
||||
@ -55,7 +56,8 @@ struct HopeFearData {
|
||||
};
|
||||
|
||||
//Abstract base class
|
||||
class HopeFearDecoder {
|
||||
class HopeFearDecoder
|
||||
{
|
||||
public:
|
||||
//iterator methods
|
||||
virtual void reset() = 0;
|
||||
@ -68,10 +70,10 @@ public:
|
||||
* Calculate hope, fear and model hypotheses
|
||||
**/
|
||||
virtual void HopeFear(
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
) = 0;
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
) = 0;
|
||||
|
||||
/** Max score decoding */
|
||||
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
|
||||
@ -86,25 +88,26 @@ protected:
|
||||
|
||||
|
||||
/** Gets hope-fear from nbest lists */
|
||||
class NbestHopeFearDecoder : public virtual HopeFearDecoder {
|
||||
class NbestHopeFearDecoder : public virtual HopeFearDecoder
|
||||
{
|
||||
public:
|
||||
NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
|
||||
const std::vector<std::string>& scoreFiles,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
Scorer* scorer
|
||||
);
|
||||
const std::vector<std::string>& scoreFiles,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
Scorer* scorer
|
||||
);
|
||||
|
||||
virtual void reset();
|
||||
virtual void next();
|
||||
virtual bool finished();
|
||||
|
||||
virtual void HopeFear(
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
);
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
);
|
||||
|
||||
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
|
||||
|
||||
@ -117,29 +120,30 @@ private:
|
||||
|
||||
|
||||
/** Gets hope-fear from hypergraphs */
|
||||
class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
|
||||
class HypergraphHopeFearDecoder : public virtual HopeFearDecoder
|
||||
{
|
||||
public:
|
||||
HypergraphHopeFearDecoder(
|
||||
const std::string& hypergraphDir,
|
||||
const std::vector<std::string>& referenceFiles,
|
||||
size_t num_dense,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
size_t hg_pruning,
|
||||
const MiraWeightVector& wv,
|
||||
Scorer* scorer_
|
||||
);
|
||||
const std::string& hypergraphDir,
|
||||
const std::vector<std::string>& referenceFiles,
|
||||
size_t num_dense,
|
||||
bool streaming,
|
||||
bool no_shuffle,
|
||||
bool safe_hope,
|
||||
size_t hg_pruning,
|
||||
const MiraWeightVector& wv,
|
||||
Scorer* scorer_
|
||||
);
|
||||
|
||||
virtual void reset();
|
||||
virtual void next();
|
||||
virtual bool finished();
|
||||
|
||||
virtual void HopeFear(
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
);
|
||||
const std::vector<ValType>& backgroundBleu,
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
);
|
||||
|
||||
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
|
||||
|
||||
|
@ -55,7 +55,8 @@ void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
|
||||
}
|
||||
|
||||
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {
|
||||
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc)
|
||||
{
|
||||
|
||||
if (tree->GetLength() > 0) {
|
||||
string head = getHead(tree);
|
||||
@ -64,8 +65,7 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
|
||||
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
|
||||
extractHeadWordChain(*it, history, hwc);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
vector<string> new_history(kHwcmOrder);
|
||||
new_history[0] = head;
|
||||
hwc[0][head]++;
|
||||
@ -85,11 +85,11 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
|
||||
}
|
||||
}
|
||||
|
||||
string HwcmScorer::getHead(TreePointer tree) {
|
||||
string HwcmScorer::getHead(TreePointer tree)
|
||||
{
|
||||
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
|
||||
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
|
||||
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
|
||||
{
|
||||
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
|
||||
TreePointer child = *it;
|
||||
|
||||
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {
|
||||
|
@ -31,18 +31,22 @@ using namespace std;
|
||||
static const string kBOS = "<s>";
|
||||
static const string kEOS = "</s>";
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
StringPiece NextLine(util::FilePiece& from) {
|
||||
StringPiece NextLine(util::FilePiece& from)
|
||||
{
|
||||
StringPiece line;
|
||||
while ((line = from.ReadLine()).starts_with("#"));
|
||||
return line;
|
||||
}
|
||||
|
||||
Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
|
||||
Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS))
|
||||
{
|
||||
}
|
||||
|
||||
const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
|
||||
const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str)
|
||||
{
|
||||
#if BOOST_VERSION >= 104200
|
||||
Map::const_iterator i= map_.find(str, Hash(), Equals());
|
||||
#else
|
||||
@ -62,7 +66,8 @@ double_conversion::StringToDoubleConverter converter(double_conversion::StringTo
|
||||
/**
|
||||
* Reads an incoming edge. Returns edge and source words covered.
|
||||
**/
|
||||
static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
|
||||
static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph)
|
||||
{
|
||||
Edge* edge = graph.NewEdge();
|
||||
StringPiece line = from.ReadLine(); //Don't allow comments within edge lists
|
||||
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
|
||||
@ -82,7 +87,7 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
|
||||
edge->AddWord(&found);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Features
|
||||
++pipes;
|
||||
for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
|
||||
@ -100,17 +105,18 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
|
||||
//Covered words
|
||||
++pipes;
|
||||
size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
|
||||
return pair<Edge*,size_t>(edge,sourceCovered);
|
||||
return pair<Edge*,size_t>(edge,sourceCovered);
|
||||
}
|
||||
|
||||
void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
|
||||
void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const
|
||||
{
|
||||
|
||||
Graph& newGraph = *pNewGraph;
|
||||
//TODO: Optimise case where no pruning required
|
||||
|
||||
//For debug
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
map<const Edge*, string> edgeIds;
|
||||
for (size_t i = 0; i < edges_.Size(); ++i) {
|
||||
@ -136,7 +142,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
|
||||
//Compute backward scores
|
||||
for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
|
||||
// cerr << "Vertex " << vi << endl;
|
||||
// cerr << "Vertex " << vi << endl;
|
||||
const Vertex& vertex = vertices_[vi];
|
||||
const vector<const Edge*>& incoming = vertex.GetIncoming();
|
||||
if (!incoming.size()) {
|
||||
@ -150,7 +156,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
//cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
|
||||
size_t childId = incoming[ei]->Children()[i];
|
||||
UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
|
||||
HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
|
||||
HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
|
||||
outgoing[childId].push_back(incoming[ei]);
|
||||
incomingScore += vertexBackwardScores[childId];
|
||||
}
|
||||
@ -172,7 +178,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
} else {
|
||||
for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
|
||||
//cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
|
||||
FeatureStatsType outgoingScore = 0;
|
||||
FeatureStatsType outgoingScore = 0;
|
||||
//add score of head
|
||||
outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
|
||||
//cerr << "Forward score " << outgoingScore << endl;
|
||||
@ -204,11 +210,11 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
}
|
||||
FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
|
||||
edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
|
||||
// cerr << edgeIds[edge] << " " << score << endl;
|
||||
// cerr << edgeIds[edge] << " " << score << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
|
||||
size_t edgeCount = 1;
|
||||
while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
|
||||
@ -235,10 +241,10 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
map<size_t,size_t> oldIdToNew;
|
||||
size_t vi = 0;
|
||||
for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
|
||||
// cerr << *i << " New: " << vi << endl;
|
||||
// cerr << *i << " New: " << vi << endl;
|
||||
oldIdToNew[*i] = vi;
|
||||
Vertex* vertex = newGraph.NewVertex();
|
||||
vertex->SetSourceCovered(vertices_[*i].SourceCovered());
|
||||
vertex->SetSourceCovered(vertices_[*i].SourceCovered());
|
||||
}
|
||||
|
||||
for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
|
||||
@ -255,7 +261,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
newHead.AddEdge(newEdge);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
cerr << "New graph" << endl;
|
||||
for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
|
||||
@ -275,21 +281,22 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
|
||||
**/
|
||||
void ReadGraph(util::FilePiece &from, Graph &graph) {
|
||||
void ReadGraph(util::FilePiece &from, Graph &graph)
|
||||
{
|
||||
|
||||
//First line should contain field names
|
||||
StringPiece line = from.ReadLine();
|
||||
UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
|
||||
line = NextLine(from);
|
||||
|
||||
|
||||
//Then expect numbers of vertices
|
||||
util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
|
||||
unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
|
||||
@ -304,9 +311,11 @@ void ReadGraph(util::FilePiece &from, Graph &graph) {
|
||||
for (unsigned long int e = 0; e < edge_count; ++e) {
|
||||
pair<Edge*,size_t> edge = ReadEdge(from, graph);
|
||||
vertex->AddEdge(edge.first);
|
||||
//Note: the file format attaches this to the edge, but it's really a property
|
||||
//Note: the file format attaches this to the edge, but it's really a property
|
||||
//of the vertex.
|
||||
if (!e) {vertex->SetSourceCovered(edge.second);}
|
||||
if (!e) {
|
||||
vertex->SetSourceCovered(edge.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -37,81 +37,88 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include "FeatureStats.h"
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
typedef unsigned int WordIndex;
|
||||
const WordIndex kMaxWordIndex = UINT_MAX;
|
||||
const FeatureStatsType kMinScore = -1e10;
|
||||
|
||||
template <class T> class FixedAllocator : boost::noncopyable {
|
||||
public:
|
||||
FixedAllocator() : current_(NULL), end_(NULL) {}
|
||||
template <class T> class FixedAllocator : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
FixedAllocator() : current_(NULL), end_(NULL) {}
|
||||
|
||||
void Init(std::size_t count) {
|
||||
assert(!current_);
|
||||
array_.reset(new T[count]);
|
||||
current_ = array_.get();
|
||||
end_ = current_ + count;
|
||||
}
|
||||
void Init(std::size_t count) {
|
||||
assert(!current_);
|
||||
array_.reset(new T[count]);
|
||||
current_ = array_.get();
|
||||
end_ = current_ + count;
|
||||
}
|
||||
|
||||
T &operator[](std::size_t idx) {
|
||||
return array_.get()[idx];
|
||||
}
|
||||
const T &operator[](std::size_t idx) const {
|
||||
return array_.get()[idx];
|
||||
}
|
||||
T &operator[](std::size_t idx) {
|
||||
return array_.get()[idx];
|
||||
}
|
||||
const T &operator[](std::size_t idx) const {
|
||||
return array_.get()[idx];
|
||||
}
|
||||
|
||||
T *New() {
|
||||
T *ret = current_++;
|
||||
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
|
||||
return ret;
|
||||
}
|
||||
T *New() {
|
||||
T *ret = current_++;
|
||||
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t Capacity() const {
|
||||
return end_ - array_.get();
|
||||
}
|
||||
std::size_t Capacity() const {
|
||||
return end_ - array_.get();
|
||||
}
|
||||
|
||||
std::size_t Size() const {
|
||||
return current_ - array_.get();
|
||||
}
|
||||
std::size_t Size() const {
|
||||
return current_ - array_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
boost::scoped_array<T> array_;
|
||||
T *current_, *end_;
|
||||
private:
|
||||
boost::scoped_array<T> array_;
|
||||
T *current_, *end_;
|
||||
};
|
||||
|
||||
|
||||
class Vocab {
|
||||
public:
|
||||
Vocab();
|
||||
class Vocab
|
||||
{
|
||||
public:
|
||||
Vocab();
|
||||
|
||||
typedef std::pair<const char *const, WordIndex> Entry;
|
||||
typedef std::pair<const char *const, WordIndex> Entry;
|
||||
|
||||
const Entry &FindOrAdd(const StringPiece &str);
|
||||
const Entry &FindOrAdd(const StringPiece &str);
|
||||
|
||||
const Entry& Bos() const {return bos_;}
|
||||
const Entry& Bos() const {
|
||||
return bos_;
|
||||
}
|
||||
|
||||
const Entry& Eos() const {return eos_;}
|
||||
const Entry& Eos() const {
|
||||
return eos_;
|
||||
}
|
||||
|
||||
private:
|
||||
util::Pool piece_backing_;
|
||||
private:
|
||||
util::Pool piece_backing_;
|
||||
|
||||
struct Hash : public std::unary_function<const char *, std::size_t> {
|
||||
std::size_t operator()(StringPiece str) const {
|
||||
return util::MurmurHashNative(str.data(), str.size());
|
||||
}
|
||||
};
|
||||
struct Hash : public std::unary_function<const char *, std::size_t> {
|
||||
std::size_t operator()(StringPiece str) const {
|
||||
return util::MurmurHashNative(str.data(), str.size());
|
||||
}
|
||||
};
|
||||
|
||||
struct Equals : public std::binary_function<const char *, const char *, bool> {
|
||||
bool operator()(StringPiece first, StringPiece second) const {
|
||||
return first == second;
|
||||
}
|
||||
};
|
||||
struct Equals : public std::binary_function<const char *, const char *, bool> {
|
||||
bool operator()(StringPiece first, StringPiece second) const {
|
||||
return first == second;
|
||||
}
|
||||
};
|
||||
|
||||
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
|
||||
Map map_;
|
||||
Entry eos_;
|
||||
Entry bos_;
|
||||
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
|
||||
Map map_;
|
||||
Entry eos_;
|
||||
Entry bos_;
|
||||
|
||||
};
|
||||
|
||||
@ -125,121 +132,141 @@ typedef boost::shared_ptr<SparseVector> FeaturePtr;
|
||||
/**
|
||||
* An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
|
||||
**/
|
||||
class Edge {
|
||||
public:
|
||||
Edge() {features_.reset(new SparseVector());}
|
||||
class Edge
|
||||
{
|
||||
public:
|
||||
Edge() {
|
||||
features_.reset(new SparseVector());
|
||||
}
|
||||
|
||||
void AddWord(const Vocab::Entry *word) {
|
||||
words_.push_back(word);
|
||||
}
|
||||
void AddWord(const Vocab::Entry *word) {
|
||||
words_.push_back(word);
|
||||
}
|
||||
|
||||
void AddChild(size_t child) {
|
||||
children_.push_back(child);
|
||||
}
|
||||
void AddChild(size_t child) {
|
||||
children_.push_back(child);
|
||||
}
|
||||
|
||||
void AddFeature(const StringPiece& name, FeatureStatsType value) {
|
||||
//TODO StringPiece interface
|
||||
features_->set(name.as_string(),value);
|
||||
}
|
||||
void AddFeature(const StringPiece& name, FeatureStatsType value) {
|
||||
//TODO StringPiece interface
|
||||
features_->set(name.as_string(),value);
|
||||
}
|
||||
|
||||
|
||||
const WordVec &Words() const {
|
||||
return words_;
|
||||
}
|
||||
|
||||
const FeaturePtr& Features() const {
|
||||
return features_;
|
||||
}
|
||||
const WordVec &Words() const {
|
||||
return words_;
|
||||
}
|
||||
|
||||
void SetFeatures(const FeaturePtr& features) {
|
||||
features_ = features;
|
||||
}
|
||||
const FeaturePtr& Features() const {
|
||||
return features_;
|
||||
}
|
||||
|
||||
const std::vector<size_t>& Children() const {
|
||||
return children_;
|
||||
}
|
||||
void SetFeatures(const FeaturePtr& features) {
|
||||
features_ = features;
|
||||
}
|
||||
|
||||
FeatureStatsType GetScore(const SparseVector& weights) const {
|
||||
return inner_product(*(features_.get()), weights);
|
||||
}
|
||||
const std::vector<size_t>& Children() const {
|
||||
return children_;
|
||||
}
|
||||
|
||||
private:
|
||||
// NULL for non-terminals.
|
||||
std::vector<const Vocab::Entry*> words_;
|
||||
std::vector<size_t> children_;
|
||||
boost::shared_ptr<SparseVector> features_;
|
||||
FeatureStatsType GetScore(const SparseVector& weights) const {
|
||||
return inner_product(*(features_.get()), weights);
|
||||
}
|
||||
|
||||
private:
|
||||
// NULL for non-terminals.
|
||||
std::vector<const Vocab::Entry*> words_;
|
||||
std::vector<size_t> children_;
|
||||
boost::shared_ptr<SparseVector> features_;
|
||||
};
|
||||
|
||||
/*
|
||||
* A vertex has 0..n incoming edges
|
||||
**/
|
||||
class Vertex {
|
||||
public:
|
||||
Vertex() : sourceCovered_(0) {}
|
||||
class Vertex
|
||||
{
|
||||
public:
|
||||
Vertex() : sourceCovered_(0) {}
|
||||
|
||||
void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
|
||||
void AddEdge(const Edge* edge) {
|
||||
incoming_.push_back(edge);
|
||||
}
|
||||
|
||||
void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
|
||||
void SetSourceCovered(size_t sourceCovered) {
|
||||
sourceCovered_ = sourceCovered;
|
||||
}
|
||||
|
||||
const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
|
||||
const std::vector<const Edge*>& GetIncoming() const {
|
||||
return incoming_;
|
||||
}
|
||||
|
||||
size_t SourceCovered() const {return sourceCovered_;}
|
||||
size_t SourceCovered() const {
|
||||
return sourceCovered_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<const Edge*> incoming_;
|
||||
size_t sourceCovered_;
|
||||
private:
|
||||
std::vector<const Edge*> incoming_;
|
||||
size_t sourceCovered_;
|
||||
};
|
||||
|
||||
|
||||
class Graph : boost::noncopyable {
|
||||
public:
|
||||
Graph(Vocab& vocab) : vocab_(vocab) {}
|
||||
class Graph : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
Graph(Vocab& vocab) : vocab_(vocab) {}
|
||||
|
||||
void SetCounts(std::size_t vertices, std::size_t edges) {
|
||||
vertices_.Init(vertices);
|
||||
edges_.Init(edges);
|
||||
}
|
||||
void SetCounts(std::size_t vertices, std::size_t edges) {
|
||||
vertices_.Init(vertices);
|
||||
edges_.Init(edges);
|
||||
}
|
||||
|
||||
Vocab &MutableVocab() { return vocab_; }
|
||||
Vocab &MutableVocab() {
|
||||
return vocab_;
|
||||
}
|
||||
|
||||
Edge *NewEdge() {
|
||||
return edges_.New();
|
||||
}
|
||||
Edge *NewEdge() {
|
||||
return edges_.New();
|
||||
}
|
||||
|
||||
Vertex *NewVertex() {
|
||||
return vertices_.New();
|
||||
}
|
||||
Vertex *NewVertex() {
|
||||
return vertices_.New();
|
||||
}
|
||||
|
||||
const Vertex &GetVertex(std::size_t index) const {
|
||||
return vertices_[index];
|
||||
}
|
||||
const Vertex &GetVertex(std::size_t index) const {
|
||||
return vertices_[index];
|
||||
}
|
||||
|
||||
Edge &GetEdge(std::size_t index) {
|
||||
return edges_[index];
|
||||
}
|
||||
Edge &GetEdge(std::size_t index) {
|
||||
return edges_[index];
|
||||
}
|
||||
|
||||
/* Created a pruned copy of this graph with minEdgeCount edges. Uses
|
||||
the scores in the max-product semiring to rank edges, as suggested by
|
||||
Colin Cherry */
|
||||
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
|
||||
/* Created a pruned copy of this graph with minEdgeCount edges. Uses
|
||||
the scores in the max-product semiring to rank edges, as suggested by
|
||||
Colin Cherry */
|
||||
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
|
||||
|
||||
std::size_t VertexSize() const { return vertices_.Size(); }
|
||||
std::size_t EdgeSize() const { return edges_.Size(); }
|
||||
std::size_t VertexSize() const {
|
||||
return vertices_.Size();
|
||||
}
|
||||
std::size_t EdgeSize() const {
|
||||
return edges_.Size();
|
||||
}
|
||||
|
||||
bool IsBoundary(const Vocab::Entry* word) const {
|
||||
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
|
||||
}
|
||||
bool IsBoundary(const Vocab::Entry* word) const {
|
||||
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
|
||||
}
|
||||
|
||||
private:
|
||||
FixedAllocator<Edge> edges_;
|
||||
FixedAllocator<Vertex> vertices_;
|
||||
Vocab& vocab_;
|
||||
private:
|
||||
FixedAllocator<Edge> edges_;
|
||||
FixedAllocator<Vertex> vertices_;
|
||||
Vocab& vocab_;
|
||||
};
|
||||
|
||||
class HypergraphException : public util::Exception {
|
||||
public:
|
||||
HypergraphException() {}
|
||||
~HypergraphException() throw() {}
|
||||
class HypergraphException : public util::Exception
|
||||
{
|
||||
public:
|
||||
HypergraphException() {}
|
||||
~HypergraphException() throw() {}
|
||||
};
|
||||
|
||||
|
||||
|
@ -8,12 +8,12 @@
|
||||
using namespace std;
|
||||
using namespace MosesTuning;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(prune)
|
||||
BOOST_AUTO_TEST_CASE(prune)
|
||||
{
|
||||
Vocab vocab;
|
||||
WordVec words;
|
||||
string wordStrings[] =
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
|
||||
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
|
||||
for (size_t i = 0; i < 13; ++i) {
|
||||
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
|
||||
}
|
||||
@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(prune)
|
||||
|
||||
BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
|
||||
BOOST_CHECK_EQUAL(4, pruned.VertexSize());
|
||||
|
||||
|
||||
//edges retained should be best path (<s> ab jk </s>) and hi
|
||||
BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
|
||||
BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
|
||||
@ -115,37 +115,37 @@ BOOST_AUTO_TEST_CASE(prune)
|
||||
const Edge* edge;
|
||||
|
||||
edge = pruned.GetVertex(0).GetIncoming()[0];
|
||||
BOOST_CHECK_EQUAL(1, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(1, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
|
||||
|
||||
edge = pruned.GetVertex(1).GetIncoming()[0];
|
||||
BOOST_CHECK_EQUAL(3, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
|
||||
BOOST_CHECK_EQUAL(3, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
|
||||
|
||||
edge = pruned.GetVertex(1).GetIncoming()[1];
|
||||
BOOST_CHECK_EQUAL(3, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
|
||||
|
||||
edge = pruned.GetVertex(2).GetIncoming()[0];
|
||||
BOOST_CHECK_EQUAL(3, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
|
||||
|
||||
edge = pruned.GetVertex(3).GetIncoming()[0];
|
||||
BOOST_CHECK_EQUAL(2, edge->Words().size());
|
||||
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
|
||||
BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
|
||||
BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
|
||||
|
||||
|
||||
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
|
||||
|
||||
|
||||
}
|
||||
|
@ -174,19 +174,19 @@ float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& tota
|
||||
|
||||
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
|
||||
{
|
||||
size_t scorerNum = 0;
|
||||
size_t last = 0;
|
||||
float refLen = 0;
|
||||
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
|
||||
itsc != m_scorers.end(); ++itsc) {
|
||||
int numScoresScorer = (*itsc)->NumberOfScores();
|
||||
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
|
||||
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
|
||||
last += numScoresScorer;
|
||||
scorerNum++;
|
||||
}
|
||||
return refLen;
|
||||
size_t scorerNum = 0;
|
||||
size_t last = 0;
|
||||
float refLen = 0;
|
||||
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
|
||||
itsc != m_scorers.end(); ++itsc) {
|
||||
int numScoresScorer = (*itsc)->NumberOfScores();
|
||||
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
|
||||
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
|
||||
last += numScoresScorer;
|
||||
scorerNum++;
|
||||
}
|
||||
return refLen;
|
||||
}
|
||||
|
||||
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
{
|
||||
|
@ -9,7 +9,8 @@ namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
|
||||
void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit)
|
||||
{
|
||||
vector<size_t> sparseFeats = sparse.feats();
|
||||
bool bFirst = true;
|
||||
size_t lastFeat = 0;
|
||||
@ -40,7 +41,8 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
|
||||
InitSparse(vec.sparse);
|
||||
}
|
||||
|
||||
MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
|
||||
MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense)
|
||||
{
|
||||
m_dense.resize(num_dense);
|
||||
//Assume that features with id [0,num_dense) are the dense features
|
||||
for (size_t id = 0; id < num_dense; ++id) {
|
||||
@ -162,7 +164,8 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
|
||||
return MiraFeatureVector(dense,sparseFeats,sparseVals);
|
||||
}
|
||||
|
||||
bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
|
||||
bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b)
|
||||
{
|
||||
ValType eps = 1e-8;
|
||||
//dense features
|
||||
if (a.m_dense.size() != b.m_dense.size()) return false;
|
||||
|
@ -93,7 +93,8 @@ void MiraWeightVector::update(size_t index, ValType delta)
|
||||
m_lastUpdated[index] = m_numUpdates;
|
||||
}
|
||||
|
||||
void MiraWeightVector::ToSparse(SparseVector* sparse) const {
|
||||
void MiraWeightVector::ToSparse(SparseVector* sparse) const
|
||||
{
|
||||
for (size_t i = 0; i < m_weights.size(); ++i) {
|
||||
if(abs(m_weights[i])>1e-8) {
|
||||
sparse->set(i,m_weights[i]);
|
||||
@ -171,7 +172,8 @@ size_t AvgWeightVector::size() const
|
||||
return m_wv.m_weights.size();
|
||||
}
|
||||
|
||||
void AvgWeightVector::ToSparse(SparseVector* sparse) const {
|
||||
void AvgWeightVector::ToSparse(SparseVector* sparse) const
|
||||
{
|
||||
for (size_t i = 0; i < size(); ++i) {
|
||||
ValType w = weight(i);
|
||||
if(abs(w)>1e-8) {
|
||||
|
@ -23,7 +23,7 @@ namespace MosesTuning
|
||||
*/
|
||||
class StatisticsBasedScorer : public Scorer
|
||||
{
|
||||
friend class HopeFearDecoder;
|
||||
friend class HopeFearDecoder;
|
||||
|
||||
public:
|
||||
StatisticsBasedScorer(const std::string& name, const std::string& config);
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
using namespace std;
|
||||
namespace TERCpp
|
||||
{
|
||||
string alignmentStruct::toString()
|
||||
{
|
||||
stringstream s;
|
||||
string alignmentStruct::toString()
|
||||
{
|
||||
stringstream s;
|
||||
// s << "nword : " << vectorToString(nwords)<<endl;
|
||||
// s << "alignment" << vectorToString(alignment)<<endl;
|
||||
// s << "afterShift" << vectorToString(alignment)<<endl;
|
||||
s << "Nothing to be printed" <<endl;
|
||||
return s.str();
|
||||
}
|
||||
s << "Nothing to be printed" <<endl;
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// alignmentStruct::alignmentStruct()
|
||||
// {
|
||||
@ -99,7 +99,7 @@ namespace TERCpp
|
||||
// return s.str();
|
||||
// }
|
||||
|
||||
/* The distance of the shift. */
|
||||
/* The distance of the shift. */
|
||||
// int alignmentStruct::distance()
|
||||
// {
|
||||
// if (moveto < start)
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -34,10 +34,10 @@ using namespace Tools;
|
||||
|
||||
namespace TERCpp
|
||||
{
|
||||
class alignmentStruct
|
||||
{
|
||||
private:
|
||||
public:
|
||||
class alignmentStruct
|
||||
{
|
||||
private:
|
||||
public:
|
||||
|
||||
// alignmentStruct();
|
||||
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
|
||||
@ -53,14 +53,14 @@ namespace TERCpp
|
||||
// int end;
|
||||
// int moveto;
|
||||
// int newloc;
|
||||
vector<string> nwords; // The words we shifted
|
||||
vector<char> alignment ; // for pra_more output
|
||||
vector<vecInt> aftershift; // for pra_more output
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
double cost;
|
||||
string toString();
|
||||
};
|
||||
vector<string> nwords; // The words we shifted
|
||||
vector<char> alignment ; // for pra_more output
|
||||
vector<vecInt> aftershift; // for pra_more output
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
double cost;
|
||||
string toString();
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -36,10 +36,10 @@ using namespace Tools;
|
||||
|
||||
namespace TERCpp
|
||||
{
|
||||
class bestShiftStruct
|
||||
{
|
||||
private:
|
||||
public:
|
||||
class bestShiftStruct
|
||||
{
|
||||
private:
|
||||
public:
|
||||
|
||||
// alignmentStruct();
|
||||
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
|
||||
@ -55,16 +55,16 @@ namespace TERCpp
|
||||
// int end;
|
||||
// int moveto;
|
||||
// int newloc;
|
||||
terShift m_best_shift;
|
||||
terAlignment m_best_align;
|
||||
bool m_empty;
|
||||
terShift m_best_shift;
|
||||
terAlignment m_best_align;
|
||||
bool m_empty;
|
||||
// vector<string> nwords; // The words we shifted
|
||||
// char* alignment ; // for pra_more output
|
||||
// vector<vecInt> aftershift; // for pra_more output
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
// double cost;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -28,156 +28,142 @@ using namespace std;
|
||||
namespace HashMapSpace
|
||||
{
|
||||
// hashMap::hashMap();
|
||||
/* hashMap::~hashMap()
|
||||
{
|
||||
// vector<stringHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMap::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMap::trouve ( long searchKey )
|
||||
/* hashMap::~hashMap()
|
||||
{
|
||||
long foundKey;
|
||||
// vector<stringHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMap::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMap::trouve ( long searchKey )
|
||||
{
|
||||
long foundKey;
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
int hashMap::trouve ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
int hashMap::trouve ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;;
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
/**
|
||||
* long hashMap::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMap::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> >(loc);
|
||||
return coll.hash(key.data(),key.data()+key.length());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* long hashMap::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMap::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> >(loc);
|
||||
return coll.hash(key.data(),key.data()+key.length());
|
||||
// boost::hash<string> hasher;
|
||||
// return hasher ( key );
|
||||
}
|
||||
/**
|
||||
* void hashMap::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMap::addHasher ( string key, string value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) ==0 )
|
||||
{
|
||||
}
|
||||
/**
|
||||
* void hashMap::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMap::addHasher ( string key, string value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) ==0 ) {
|
||||
// cerr << "ICI1" <<endl;
|
||||
stringHasher H ( hashValue ( key ),key,value );
|
||||
stringHasher H ( hashValue ( key ),key,value );
|
||||
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
|
||||
// cerr << "ICI2" <<endl;
|
||||
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
}
|
||||
stringHasher hashMap::getHasher ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
stringHasher defaut(0,"","");
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
}
|
||||
stringHasher hashMap::getHasher ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
stringHasher defaut(0,"","");
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return ( *l_hasher );
|
||||
}
|
||||
}
|
||||
return defaut;
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return ( *l_hasher );
|
||||
}
|
||||
string hashMap::getValue ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
}
|
||||
return defaut;
|
||||
}
|
||||
string hashMap::getValue ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
string hashMap::searchValue ( string value )
|
||||
{
|
||||
}
|
||||
return "";
|
||||
}
|
||||
string hashMap::searchValue ( string value )
|
||||
{
|
||||
// long searchKey=hashValue ( key );
|
||||
// long foundKey;
|
||||
string foundValue;
|
||||
string foundValue;
|
||||
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundValue= ( *l_hasher ).getValue();
|
||||
if ( foundValue.compare ( value ) == 0 )
|
||||
{
|
||||
return ( *l_hasher ).getKey();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundValue= ( *l_hasher ).getValue();
|
||||
if ( foundValue.compare ( value ) == 0 ) {
|
||||
return ( *l_hasher ).getKey();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
void hashMap::setValue ( string key , string value )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
void hashMap::setValue ( string key , string value )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
( *l_hasher ).setValue ( value );
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
( *l_hasher ).setValue ( value );
|
||||
// return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMap::printHash()
|
||||
{
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMap::printHash()
|
||||
{
|
||||
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -35,27 +35,27 @@ using namespace std;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
class hashMap
|
||||
{
|
||||
private:
|
||||
vector<stringHasher> m_hasher;
|
||||
class hashMap
|
||||
{
|
||||
private:
|
||||
vector<stringHasher> m_hasher;
|
||||
|
||||
public:
|
||||
public:
|
||||
// ~hashMap();
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, string value );
|
||||
stringHasher getHasher ( string key );
|
||||
string getValue ( string key );
|
||||
string searchValue ( string key );
|
||||
void setValue ( string key , string value );
|
||||
void printHash();
|
||||
vector<stringHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, string value );
|
||||
stringHasher getHasher ( string key );
|
||||
string getValue ( string key );
|
||||
string searchValue ( string key );
|
||||
void setValue ( string key , string value );
|
||||
void printHash();
|
||||
vector<stringHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -28,117 +28,108 @@ using namespace std;
|
||||
namespace HashMapSpace
|
||||
{
|
||||
// hashMapInfos::hashMap();
|
||||
/* hashMapInfos::~hashMap()
|
||||
{
|
||||
// vector<infosHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMapInfos::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMapInfos::trouve ( long searchKey )
|
||||
/* hashMapInfos::~hashMap()
|
||||
{
|
||||
long foundKey;
|
||||
// vector<infosHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMapInfos::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMapInfos::trouve ( long searchKey )
|
||||
{
|
||||
long foundKey;
|
||||
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
int hashMapInfos::trouve ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
int hashMapInfos::trouve ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;;
|
||||
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* long hashMapInfos::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMapInfos::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> >(loc);
|
||||
return coll.hash(key.data(),key.data()+key.length());
|
||||
/**
|
||||
* long hashMapInfos::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMapInfos::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> >(loc);
|
||||
return coll.hash(key.data(),key.data()+key.length());
|
||||
// boost::hash<string> hasher;
|
||||
// return hasher ( key );
|
||||
}
|
||||
/**
|
||||
* void hashMapInfos::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMapInfos::addHasher ( string key, vector<int> value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) ==0 )
|
||||
{
|
||||
}
|
||||
/**
|
||||
* void hashMapInfos::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMapInfos::addHasher ( string key, vector<int> value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) ==0 ) {
|
||||
// cerr << "ICI1" <<endl;
|
||||
infosHasher H ( hashValue ( key ),key,value );
|
||||
infosHasher H ( hashValue ( key ),key,value );
|
||||
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
|
||||
// cerr << "ICI2" <<endl;
|
||||
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
}
|
||||
void hashMapInfos::addValue ( string key, vector<int> value )
|
||||
{
|
||||
addHasher ( key, value );
|
||||
}
|
||||
infosHasher hashMapInfos::getHasher ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
}
|
||||
void hashMapInfos::addValue ( string key, vector<int> value )
|
||||
{
|
||||
addHasher ( key, value );
|
||||
}
|
||||
infosHasher hashMapInfos::getHasher ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return ( *l_hasher );
|
||||
}
|
||||
}
|
||||
vector<int> temp;
|
||||
infosHasher defaut(0,"",temp);
|
||||
return defaut;
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return ( *l_hasher );
|
||||
}
|
||||
vector<int> hashMapInfos::getValue ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
vector<int> retour;
|
||||
}
|
||||
vector<int> temp;
|
||||
infosHasher defaut(0,"",temp);
|
||||
return defaut;
|
||||
}
|
||||
vector<int> hashMapInfos::getValue ( string key )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
vector<int> retour;
|
||||
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
return retour;
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
return retour;
|
||||
}
|
||||
// string hashMapInfos::searchValue ( string value )
|
||||
// {
|
||||
// // long searchKey=hashValue ( key );
|
||||
@ -158,42 +149,38 @@ namespace HashMapSpace
|
||||
// }
|
||||
//
|
||||
|
||||
void hashMapInfos::setValue ( string key , vector<int> value )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
void hashMapInfos::setValue ( string key , vector<int> value )
|
||||
{
|
||||
long searchKey=hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
( *l_hasher ).setValue ( value );
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey= ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
( *l_hasher ).setValue ( value );
|
||||
// return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
string hashMapInfos::toString ()
|
||||
{
|
||||
stringstream to_return;
|
||||
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
to_return << (*l_hasher).toString();
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
string hashMapInfos::toString ()
|
||||
{
|
||||
stringstream to_return;
|
||||
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
to_return << (*l_hasher).toString();
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMapInfos::printHash()
|
||||
{
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMapInfos::printHash()
|
||||
{
|
||||
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -34,29 +34,29 @@ using namespace std;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
class hashMapInfos
|
||||
{
|
||||
private:
|
||||
vector<infosHasher> m_hasher;
|
||||
class hashMapInfos
|
||||
{
|
||||
private:
|
||||
vector<infosHasher> m_hasher;
|
||||
|
||||
public:
|
||||
public:
|
||||
// ~hashMap();
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, vector<int> value );
|
||||
void addValue ( string key, vector<int> value );
|
||||
infosHasher getHasher ( string key );
|
||||
vector<int> getValue ( string key );
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, vector<int> value );
|
||||
void addValue ( string key, vector<int> value );
|
||||
infosHasher getHasher ( string key );
|
||||
vector<int> getValue ( string key );
|
||||
// string searchValue ( string key );
|
||||
void setValue ( string key , vector<int> value );
|
||||
void printHash();
|
||||
string toString();
|
||||
vector<infosHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
void setValue ( string key , vector<int> value );
|
||||
void printHash();
|
||||
string toString();
|
||||
vector<infosHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -27,179 +27,166 @@ using namespace std;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
// hashMapStringInfos::hashMap();
|
||||
/* hashMapStringInfos::~hashMap()
|
||||
{
|
||||
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMapStringInfos::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMapStringInfos::trouve ( long searchKey )
|
||||
{
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
// hashMapStringInfos::hashMap();
|
||||
/* hashMapStringInfos::~hashMap()
|
||||
{
|
||||
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
|
||||
{
|
||||
delete(*del);
|
||||
}
|
||||
}*/
|
||||
/**
|
||||
* int hashMapStringInfos::trouve ( long searchKey )
|
||||
* @param searchKey
|
||||
* @return
|
||||
*/
|
||||
int hashMapStringInfos::trouve ( long searchKey )
|
||||
{
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hashMapStringInfos::trouve ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
int hashMapStringInfos::trouve ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* long hashMapStringInfos::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMapStringInfos::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> > ( loc );
|
||||
return coll.hash ( key.data(), key.data() + key.length() );
|
||||
/**
|
||||
* long hashMapStringInfos::hashValue ( string key )
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
long hashMapStringInfos::hashValue ( string key )
|
||||
{
|
||||
locale loc; // the "C" locale
|
||||
const collate<char>& coll = use_facet<collate<char> > ( loc );
|
||||
return coll.hash ( key.data(), key.data() + key.length() );
|
||||
// boost::hash<string> hasher;
|
||||
// return hasher ( key );
|
||||
}
|
||||
/**
|
||||
* void hashMapStringInfos::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMapStringInfos::addHasher ( string key, vector<string> value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) == 0 )
|
||||
{
|
||||
// cerr << "ICI1" <<endl;
|
||||
stringInfosHasher H ( hashValue ( key ), key, value );
|
||||
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
|
||||
// cerr << "ICI2" <<endl;
|
||||
}
|
||||
/**
|
||||
* void hashMapStringInfos::addHasher ( string key, string value )
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
void hashMapStringInfos::addHasher ( string key, vector<string> value )
|
||||
{
|
||||
if ( trouve ( hashValue ( key ) ) == 0 ) {
|
||||
// cerr << "ICI1" <<endl;
|
||||
stringInfosHasher H ( hashValue ( key ), key, value );
|
||||
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
|
||||
// cerr << "ICI2" <<endl;
|
||||
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
m_hasher.push_back ( H );
|
||||
}
|
||||
}
|
||||
void hashMapStringInfos::addValue ( string key, vector<string> value )
|
||||
{
|
||||
addHasher ( key, value );
|
||||
}
|
||||
stringInfosHasher hashMapStringInfos::getHasher ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
return ( *l_hasher );
|
||||
}
|
||||
void hashMapStringInfos::addValue ( string key, vector<string> value )
|
||||
{
|
||||
addHasher ( key, value );
|
||||
}
|
||||
vector<string> tmp;
|
||||
stringInfosHasher defaut ( 0, "", tmp );
|
||||
return defaut;
|
||||
}
|
||||
vector<string> hashMapStringInfos::getValue ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
vector<string> retour;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
stringInfosHasher hashMapStringInfos::getHasher ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
return ( *l_hasher );
|
||||
}
|
||||
}
|
||||
vector<string> tmp;
|
||||
stringInfosHasher defaut ( 0, "", tmp );
|
||||
return defaut;
|
||||
}
|
||||
vector<string> hashMapStringInfos::getValue ( string key )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
vector<string> retour;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
|
||||
return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
return retour;
|
||||
}
|
||||
// string hashMapStringInfos::searchValue ( string value )
|
||||
// {
|
||||
// // long searchKey=hashValue ( key );
|
||||
// // long foundKey;
|
||||
// vector<int> foundValue;
|
||||
//
|
||||
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
// {
|
||||
// foundValue= ( *l_hasher ).getValue();
|
||||
// /* if ( foundValue.compare ( value ) == 0 )
|
||||
// {
|
||||
// return ( *l_hasher ).getKey();
|
||||
// }*/
|
||||
// }
|
||||
// return "";
|
||||
// }
|
||||
//
|
||||
}
|
||||
return retour;
|
||||
}
|
||||
// string hashMapStringInfos::searchValue ( string value )
|
||||
// {
|
||||
// // long searchKey=hashValue ( key );
|
||||
// // long foundKey;
|
||||
// vector<int> foundValue;
|
||||
//
|
||||
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
|
||||
// {
|
||||
// foundValue= ( *l_hasher ).getValue();
|
||||
// /* if ( foundValue.compare ( value ) == 0 )
|
||||
// {
|
||||
// return ( *l_hasher ).getKey();
|
||||
// }*/
|
||||
// }
|
||||
// return "";
|
||||
// }
|
||||
//
|
||||
|
||||
void hashMapStringInfos::setValue ( string key , vector<string> value )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey )
|
||||
{
|
||||
( *l_hasher ).setValue ( value );
|
||||
// return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
void hashMapStringInfos::setValue ( string key , vector<string> value )
|
||||
{
|
||||
long searchKey = hashValue ( key );
|
||||
long foundKey;
|
||||
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
foundKey = ( *l_hasher ).getHashKey();
|
||||
if ( searchKey == foundKey ) {
|
||||
( *l_hasher ).setValue ( value );
|
||||
// return ( *l_hasher ).getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string hashMapStringInfos::toString ()
|
||||
{
|
||||
stringstream to_return;
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
to_return << (*l_hasher).toString();
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
string hashMapStringInfos::toString ()
|
||||
{
|
||||
stringstream to_return;
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
to_return << (*l_hasher).toString();
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMapStringInfos::printHash()
|
||||
{
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
|
||||
{
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
}
|
||||
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
|
||||
{
|
||||
return m_hasher;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void hashMapStringInfos::printHash()
|
||||
{
|
||||
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
|
||||
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
|
||||
}
|
||||
}
|
||||
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
|
||||
{
|
||||
return m_hasher;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -34,29 +34,29 @@ using namespace std;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
class hashMapStringInfos
|
||||
{
|
||||
private:
|
||||
vector<stringInfosHasher> m_hasher;
|
||||
class hashMapStringInfos
|
||||
{
|
||||
private:
|
||||
vector<stringInfosHasher> m_hasher;
|
||||
|
||||
public:
|
||||
public:
|
||||
// ~hashMap();
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, vector<string> value );
|
||||
void addValue ( string key, vector<string> value );
|
||||
stringInfosHasher getHasher ( string key );
|
||||
vector<string> getValue ( string key );
|
||||
long hashValue ( string key );
|
||||
int trouve ( long searchKey );
|
||||
int trouve ( string key );
|
||||
void addHasher ( string key, vector<string> value );
|
||||
void addValue ( string key, vector<string> value );
|
||||
stringInfosHasher getHasher ( string key );
|
||||
vector<string> getValue ( string key );
|
||||
// string searchValue ( string key );
|
||||
void setValue ( string key , vector<string> value );
|
||||
void printHash();
|
||||
string toString();
|
||||
vector<stringInfosHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
void setValue ( string key , vector<string> value );
|
||||
void printHash();
|
||||
string toString();
|
||||
vector<stringInfosHasher> getHashMap();
|
||||
string printStringHash();
|
||||
string printStringHash2();
|
||||
string printStringHashForLexicon();
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -27,35 +27,35 @@ using namespace Tools;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueVecInt;
|
||||
}
|
||||
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueVecInt;
|
||||
}
|
||||
// infosHasher::~infosHasher(){};*/
|
||||
long infosHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string infosHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
vector<int> infosHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void infosHasher::setValue ( vector<int> value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
string infosHasher::toString()
|
||||
{
|
||||
stringstream to_return;
|
||||
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
|
||||
return to_return.str();
|
||||
}
|
||||
long infosHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string infosHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
vector<int> infosHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void infosHasher::setValue ( vector<int> value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
string infosHasher::toString()
|
||||
{
|
||||
stringstream to_return;
|
||||
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
|
||||
return to_return.str();
|
||||
}
|
||||
|
||||
|
||||
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
using namespace std;
|
||||
namespace HashMapSpace
|
||||
{
|
||||
class infosHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
vector<int> m_value;
|
||||
class infosHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
vector<int> m_value;
|
||||
|
||||
public:
|
||||
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
vector<int> getValue();
|
||||
void setValue ( vector<int> value );
|
||||
string toString();
|
||||
public:
|
||||
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
vector<int> getValue();
|
||||
void setValue ( vector<int> value );
|
||||
string toString();
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -26,29 +26,29 @@ using namespace std;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueTxt;
|
||||
}
|
||||
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueTxt;
|
||||
}
|
||||
// stringHasher::~stringHasher(){};*/
|
||||
long stringHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string stringHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
string stringHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void stringHasher::setValue ( string value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
long stringHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string stringHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
string stringHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void stringHasher::setValue ( string value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
|
||||
|
||||
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -28,22 +28,22 @@ using namespace std;
|
||||
namespace HashMapSpace
|
||||
{
|
||||
|
||||
class stringHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
string m_value;
|
||||
class stringHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
string m_value;
|
||||
|
||||
public:
|
||||
stringHasher ( long cle, string cleTxt, string valueTxt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
string getValue();
|
||||
void setValue ( string value );
|
||||
public:
|
||||
stringHasher ( long cle, string cleTxt, string valueTxt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
string getValue();
|
||||
void setValue ( string value );
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -27,35 +27,35 @@ using namespace Tools;
|
||||
|
||||
namespace HashMapSpace
|
||||
{
|
||||
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueVecInt;
|
||||
}
|
||||
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
|
||||
{
|
||||
m_hashKey=cle;
|
||||
m_key=cleTxt;
|
||||
m_value=valueVecInt;
|
||||
}
|
||||
// stringInfosHasher::~stringInfosHasher(){};*/
|
||||
long stringInfosHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string stringInfosHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
vector<string> stringInfosHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void stringInfosHasher::setValue ( vector<string> value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
string stringInfosHasher::toString()
|
||||
{
|
||||
stringstream to_return;
|
||||
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
|
||||
return to_return.str();
|
||||
}
|
||||
long stringInfosHasher::getHashKey()
|
||||
{
|
||||
return m_hashKey;
|
||||
}
|
||||
string stringInfosHasher::getKey()
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
vector<string> stringInfosHasher::getValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
void stringInfosHasher::setValue ( vector<string> value )
|
||||
{
|
||||
m_value=value;
|
||||
}
|
||||
string stringInfosHasher::toString()
|
||||
{
|
||||
stringstream to_return;
|
||||
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
|
||||
return to_return.str();
|
||||
}
|
||||
|
||||
|
||||
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
using namespace std;
|
||||
namespace HashMapSpace
|
||||
{
|
||||
class stringInfosHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
vector<string> m_value;
|
||||
class stringInfosHasher
|
||||
{
|
||||
private:
|
||||
long m_hashKey;
|
||||
string m_key;
|
||||
vector<string> m_value;
|
||||
|
||||
public:
|
||||
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
vector<string> getValue();
|
||||
void setValue ( vector<string> value );
|
||||
string toString();
|
||||
public:
|
||||
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
|
||||
long getHashKey();
|
||||
string getKey();
|
||||
vector<string> getValue();
|
||||
void setValue ( vector<string> value );
|
||||
string toString();
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -24,191 +24,163 @@ using namespace std;
|
||||
namespace TERCpp
|
||||
{
|
||||
|
||||
terAlignment::terAlignment()
|
||||
{
|
||||
terAlignment::terAlignment()
|
||||
{
|
||||
// vector<string> ref;
|
||||
// vector<string> hyp;
|
||||
// vector<string> aftershift;
|
||||
|
||||
// TERshift[] allshifts = null;
|
||||
// TERshift[] allshifts = null;
|
||||
|
||||
numEdits=0;
|
||||
numWords=0;
|
||||
bestRef="";
|
||||
numEdits=0;
|
||||
numWords=0;
|
||||
bestRef="";
|
||||
|
||||
numIns=0;
|
||||
numDel=0;
|
||||
numSub=0;
|
||||
numSft=0;
|
||||
numWsf=0;
|
||||
}
|
||||
string terAlignment::toString()
|
||||
{
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
s << "Original Ref: \t" << join ( " ", ref ) << endl;
|
||||
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
|
||||
s << "Hyp After Shift:\t" << join ( " ", aftershift );
|
||||
numIns=0;
|
||||
numDel=0;
|
||||
numSub=0;
|
||||
numSft=0;
|
||||
numWsf=0;
|
||||
}
|
||||
string terAlignment::toString()
|
||||
{
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
s << "Original Ref: \t" << join ( " ", ref ) << endl;
|
||||
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
|
||||
s << "Hyp After Shift:\t" << join ( " ", aftershift );
|
||||
// s << "Hyp After Shift: " << join ( " ", aftershift );
|
||||
s << endl;
|
||||
s << endl;
|
||||
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
|
||||
if ( ( int ) sizeof ( alignment ) >0 )
|
||||
{
|
||||
s << "Alignment: (";
|
||||
if ( ( int ) sizeof ( alignment ) >0 ) {
|
||||
s << "Alignment: (";
|
||||
// s += "\nAlignment: (";
|
||||
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
|
||||
{
|
||||
s << alignment[i];
|
||||
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
|
||||
s << alignment[i];
|
||||
// s+=alignment[i];
|
||||
}
|
||||
// s += ")";
|
||||
s << ")";
|
||||
}
|
||||
s << endl;
|
||||
if ( ( int ) allshifts.size() == 0 )
|
||||
{
|
||||
// s += "\nNumShifts: 0";
|
||||
s << "NumShifts: 0";
|
||||
}
|
||||
else
|
||||
{
|
||||
// s += "\nNumShifts: " + (int)allshifts.size();
|
||||
s << "NumShifts: "<< ( int ) allshifts.size();
|
||||
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
|
||||
{
|
||||
s << endl << " " ;
|
||||
s << ( ( terShift ) allshifts[i] ).toString();
|
||||
// s += "\n " + allshifts[i];
|
||||
}
|
||||
}
|
||||
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
|
||||
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
|
||||
return s.str();
|
||||
|
||||
}
|
||||
string terAlignment::join ( string delim, vector<string> arr )
|
||||
{
|
||||
if ( ( int ) arr.size() == 0 ) return "";
|
||||
// s += ")";
|
||||
s << ")";
|
||||
}
|
||||
s << endl;
|
||||
if ( ( int ) allshifts.size() == 0 ) {
|
||||
// s += "\nNumShifts: 0";
|
||||
s << "NumShifts: 0";
|
||||
} else {
|
||||
// s += "\nNumShifts: " + (int)allshifts.size();
|
||||
s << "NumShifts: "<< ( int ) allshifts.size();
|
||||
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
|
||||
s << endl << " " ;
|
||||
s << ( ( terShift ) allshifts[i] ).toString();
|
||||
// s += "\n " + allshifts[i];
|
||||
}
|
||||
}
|
||||
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
|
||||
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
|
||||
return s.str();
|
||||
|
||||
}
|
||||
string terAlignment::join ( string delim, vector<string> arr )
|
||||
{
|
||||
if ( ( int ) arr.size() == 0 ) return "";
|
||||
// if ((int)delim.compare("") == 0) delim = new String("");
|
||||
// String s = new String("");
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
for ( int i = 0; i < ( int ) arr.size(); i++ )
|
||||
{
|
||||
if ( i == 0 )
|
||||
{
|
||||
s << arr.at ( i );
|
||||
}
|
||||
else
|
||||
{
|
||||
s << delim << arr.at ( i );
|
||||
}
|
||||
}
|
||||
return s.str();
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
for ( int i = 0; i < ( int ) arr.size(); i++ ) {
|
||||
if ( i == 0 ) {
|
||||
s << arr.at ( i );
|
||||
} else {
|
||||
s << delim << arr.at ( i );
|
||||
}
|
||||
}
|
||||
return s.str();
|
||||
// return "";
|
||||
}
|
||||
double terAlignment::score()
|
||||
{
|
||||
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
|
||||
return 1.0;
|
||||
}
|
||||
if ( numWords <= 0.0 ) {
|
||||
return 0.0;
|
||||
}
|
||||
return ( double ) numEdits / numWords;
|
||||
}
|
||||
double terAlignment::scoreAv()
|
||||
{
|
||||
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
|
||||
return 1.0;
|
||||
}
|
||||
if ( averageWords <= 0.0 ) {
|
||||
return 0.0;
|
||||
}
|
||||
return ( double ) numEdits / averageWords;
|
||||
}
|
||||
|
||||
void terAlignment::scoreDetails()
|
||||
{
|
||||
numIns = numDel = numSub = numWsf = numSft = 0;
|
||||
if((int)allshifts.size()>0) {
|
||||
for(int i = 0; i < (int)allshifts.size(); ++i) {
|
||||
numWsf += allshifts[i].size();
|
||||
}
|
||||
double terAlignment::score()
|
||||
{
|
||||
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
if ( numWords <= 0.0 )
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
return ( double ) numEdits / numWords;
|
||||
numSft = allshifts.size();
|
||||
}
|
||||
|
||||
if((int)alignment.size()>0 ) {
|
||||
for(int i = 0; i < (int)alignment.size(); ++i) {
|
||||
switch (alignment[i]) {
|
||||
case 'S':
|
||||
case 'T':
|
||||
numSub++;
|
||||
break;
|
||||
case 'D':
|
||||
numDel++;
|
||||
break;
|
||||
case 'I':
|
||||
numIns++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
double terAlignment::scoreAv()
|
||||
{
|
||||
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
if ( averageWords <= 0.0 )
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
return ( double ) numEdits / averageWords;
|
||||
}
|
||||
// if(numEdits != numSft + numDel + numIns + numSub)
|
||||
// System.out.println("** Error, unmatch edit erros " + numEdits +
|
||||
// " vs " + (numSft + numDel + numIns + numSub));
|
||||
}
|
||||
string terAlignment::printAlignments()
|
||||
{
|
||||
stringstream to_return;
|
||||
for(int i = 0; i < (int)alignment.size(); ++i) {
|
||||
char alignInfo=alignment.at(i);
|
||||
if (alignInfo == 'A' ) {
|
||||
alignInfo='A';
|
||||
}
|
||||
|
||||
void terAlignment::scoreDetails()
|
||||
{
|
||||
numIns = numDel = numSub = numWsf = numSft = 0;
|
||||
if((int)allshifts.size()>0)
|
||||
{
|
||||
for(int i = 0; i < (int)allshifts.size(); ++i)
|
||||
{
|
||||
numWsf += allshifts[i].size();
|
||||
}
|
||||
numSft = allshifts.size();
|
||||
}
|
||||
|
||||
if((int)alignment.size()>0 )
|
||||
{
|
||||
for(int i = 0; i < (int)alignment.size(); ++i)
|
||||
{
|
||||
switch (alignment[i])
|
||||
{
|
||||
case 'S':
|
||||
case 'T':
|
||||
numSub++;
|
||||
break;
|
||||
case 'D':
|
||||
numDel++;
|
||||
break;
|
||||
case 'I':
|
||||
numIns++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if(numEdits != numSft + numDel + numIns + numSub)
|
||||
// System.out.println("** Error, unmatch edit erros " + numEdits +
|
||||
// " vs " + (numSft + numDel + numIns + numSub));
|
||||
}
|
||||
string terAlignment::printAlignments()
|
||||
{
|
||||
stringstream to_return;
|
||||
for(int i = 0; i < (int)alignment.size(); ++i)
|
||||
{
|
||||
char alignInfo=alignment.at(i);
|
||||
if (alignInfo == 'A' )
|
||||
{
|
||||
alignInfo='A';
|
||||
}
|
||||
|
||||
if (i==0)
|
||||
{
|
||||
to_return << alignInfo;
|
||||
}
|
||||
else
|
||||
{
|
||||
to_return << " " << alignInfo;
|
||||
}
|
||||
}
|
||||
return to_return.str();
|
||||
if (i==0) {
|
||||
to_return << alignInfo;
|
||||
} else {
|
||||
to_return << " " << alignInfo;
|
||||
}
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
string terAlignment::printAllShifts()
|
||||
{
|
||||
stringstream to_return;
|
||||
if ( ( int ) allshifts.size() == 0 )
|
||||
{
|
||||
stringstream to_return;
|
||||
if ( ( int ) allshifts.size() == 0 ) {
|
||||
// s += "\nNumShifts: 0";
|
||||
to_return << "NbrShifts: 0";
|
||||
}
|
||||
else
|
||||
{
|
||||
to_return << "NbrShifts: 0";
|
||||
} else {
|
||||
// s += "\nNumShifts: " + (int)allshifts.size();
|
||||
to_return << "NbrShifts: "<< ( int ) allshifts.size();
|
||||
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
|
||||
{
|
||||
to_return << "\t" ;
|
||||
to_return << ( ( terShift ) allshifts[i] ).toString();
|
||||
to_return << "NbrShifts: "<< ( int ) allshifts.size();
|
||||
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
|
||||
to_return << "\t" ;
|
||||
to_return << ( ( terShift ) allshifts[i] ).toString();
|
||||
// s += "\n " + allshifts[i];
|
||||
}
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
}
|
||||
return to_return.str();
|
||||
}
|
||||
|
||||
}
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -34,41 +34,41 @@ using namespace std;
|
||||
namespace TERCpp
|
||||
{
|
||||
|
||||
class terAlignment
|
||||
{
|
||||
private:
|
||||
public:
|
||||
class terAlignment
|
||||
{
|
||||
private:
|
||||
public:
|
||||
|
||||
terAlignment();
|
||||
string toString();
|
||||
void scoreDetails();
|
||||
terAlignment();
|
||||
string toString();
|
||||
void scoreDetails();
|
||||
|
||||
vector<string> ref;
|
||||
vector<string> hyp;
|
||||
vector<string> aftershift;
|
||||
vector<terShift> allshifts;
|
||||
vector<int> hyp_int;
|
||||
vector<int> aftershift_int;
|
||||
vector<string> ref;
|
||||
vector<string> hyp;
|
||||
vector<string> aftershift;
|
||||
vector<terShift> allshifts;
|
||||
vector<int> hyp_int;
|
||||
vector<int> aftershift_int;
|
||||
|
||||
double numEdits;
|
||||
double numWords;
|
||||
double averageWords;
|
||||
vector<char> alignment;
|
||||
string bestRef;
|
||||
double numEdits;
|
||||
double numWords;
|
||||
double averageWords;
|
||||
vector<char> alignment;
|
||||
string bestRef;
|
||||
|
||||
int numIns;
|
||||
int numDel;
|
||||
int numSub;
|
||||
int numSft;
|
||||
int numWsf;
|
||||
int numIns;
|
||||
int numDel;
|
||||
int numSub;
|
||||
int numSft;
|
||||
int numWsf;
|
||||
|
||||
|
||||
string join ( string delim, vector<string> arr );
|
||||
double score();
|
||||
double scoreAv();
|
||||
string printAlignments();
|
||||
string printAllShifts();
|
||||
};
|
||||
string join ( string delim, vector<string> arr );
|
||||
double score();
|
||||
double scoreAv();
|
||||
string printAlignments();
|
||||
string printAllShifts();
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -42,32 +42,32 @@ namespace TERCpp
|
||||
// numSft=0;
|
||||
// numWsf=0;
|
||||
// }
|
||||
terShift::terShift ()
|
||||
{
|
||||
start = 0;
|
||||
end = 0;
|
||||
moveto = 0;
|
||||
newloc = 0;
|
||||
cost=1.0;
|
||||
}
|
||||
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
|
||||
{
|
||||
start = _start;
|
||||
end = _end;
|
||||
moveto = _moveto;
|
||||
newloc = _newloc;
|
||||
cost=1.0;
|
||||
}
|
||||
terShift::terShift ()
|
||||
{
|
||||
start = 0;
|
||||
end = 0;
|
||||
moveto = 0;
|
||||
newloc = 0;
|
||||
cost=1.0;
|
||||
}
|
||||
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
|
||||
{
|
||||
start = _start;
|
||||
end = _end;
|
||||
moveto = _moveto;
|
||||
newloc = _newloc;
|
||||
cost=1.0;
|
||||
}
|
||||
|
||||
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
|
||||
{
|
||||
start = _start;
|
||||
end = _end;
|
||||
moveto = _moveto;
|
||||
newloc = _newloc;
|
||||
shifted = _shifted;
|
||||
cost=1.0;
|
||||
}
|
||||
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
|
||||
{
|
||||
start = _start;
|
||||
end = _end;
|
||||
moveto = _moveto;
|
||||
newloc = _newloc;
|
||||
shifted = _shifted;
|
||||
cost=1.0;
|
||||
}
|
||||
// string terShift::vectorToString(vector<string> vec)
|
||||
// {
|
||||
// string retour("");
|
||||
@ -78,44 +78,38 @@ namespace TERCpp
|
||||
// return retour;
|
||||
// }
|
||||
|
||||
string terShift::toString()
|
||||
{
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
|
||||
if ( ( int ) shifted.size() > 0 )
|
||||
{
|
||||
s << " (" << vectorToString ( shifted ) << ")";
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
string terShift::toString()
|
||||
{
|
||||
stringstream s;
|
||||
s.str ( "" );
|
||||
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
|
||||
if ( ( int ) shifted.size() > 0 ) {
|
||||
s << " (" << vectorToString ( shifted ) << ")";
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
/* The distance of the shift. */
|
||||
int terShift::distance()
|
||||
{
|
||||
if ( moveto < start )
|
||||
{
|
||||
return start - moveto;
|
||||
}
|
||||
else if ( moveto > end )
|
||||
{
|
||||
return moveto - end;
|
||||
}
|
||||
else
|
||||
{
|
||||
return moveto - start;
|
||||
}
|
||||
}
|
||||
/* The distance of the shift. */
|
||||
int terShift::distance()
|
||||
{
|
||||
if ( moveto < start ) {
|
||||
return start - moveto;
|
||||
} else if ( moveto > end ) {
|
||||
return moveto - end;
|
||||
} else {
|
||||
return moveto - start;
|
||||
}
|
||||
}
|
||||
|
||||
bool terShift::leftShift()
|
||||
{
|
||||
return ( moveto < start );
|
||||
}
|
||||
bool terShift::leftShift()
|
||||
{
|
||||
return ( moveto < start );
|
||||
}
|
||||
|
||||
int terShift::size()
|
||||
{
|
||||
return ( end - start ) + 1;
|
||||
}
|
||||
int terShift::size()
|
||||
{
|
||||
return ( end - start ) + 1;
|
||||
}
|
||||
// terShift terShift::operator=(terShift t)
|
||||
// {
|
||||
//
|
||||
|
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -34,32 +34,32 @@ using namespace Tools;
|
||||
|
||||
namespace TERCpp
|
||||
{
|
||||
class terShift
|
||||
{
|
||||
private:
|
||||
public:
|
||||
class terShift
|
||||
{
|
||||
private:
|
||||
public:
|
||||
|
||||
terShift();
|
||||
terShift ( int _start, int _end, int _moveto, int _newloc );
|
||||
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
|
||||
string toString();
|
||||
int distance() ;
|
||||
bool leftShift();
|
||||
int size();
|
||||
terShift();
|
||||
terShift ( int _start, int _end, int _moveto, int _newloc );
|
||||
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
|
||||
string toString();
|
||||
int distance() ;
|
||||
bool leftShift();
|
||||
int size();
|
||||
// terShift operator=(terShift t);
|
||||
// string vectorToString(vector<string> vec);
|
||||
|
||||
int start;
|
||||
int end;
|
||||
int moveto;
|
||||
int newloc;
|
||||
vector<string> shifted; // The words we shifted
|
||||
vector<char> alignment ; // for pra_more output
|
||||
vector<string> aftershift; // for pra_more output
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
double cost;
|
||||
};
|
||||
int start;
|
||||
int end;
|
||||
int moveto;
|
||||
int newloc;
|
||||
vector<string> shifted; // The words we shifted
|
||||
vector<char> alignment ; // for pra_more output
|
||||
vector<string> aftershift; // for pra_more output
|
||||
// This is used to store the cost of a shift, so we don't have to
|
||||
// calculate it multiple times.
|
||||
double cost;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
1612
mert/TER/tercalc.cpp
1612
mert/TER/tercalc.cpp
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -41,62 +41,62 @@ namespace TERCpp
|
||||
{
|
||||
// typedef size_t WERelement[2];
|
||||
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
|
||||
typedef vector<terShift> vecTerShift;
|
||||
/**
|
||||
@author
|
||||
*/
|
||||
class terCalc
|
||||
{
|
||||
private :
|
||||
typedef vector<terShift> vecTerShift;
|
||||
/**
|
||||
@author
|
||||
*/
|
||||
class terCalc
|
||||
{
|
||||
private :
|
||||
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
|
||||
WERalignment l_WERalignment;
|
||||
WERalignment l_WERalignment;
|
||||
// HashMap contenant les valeurs de hash de chaque mot
|
||||
hashMap bagOfWords;
|
||||
int TAILLE_PERMUT_MAX;
|
||||
// Increments internes
|
||||
int NBR_SEGS_EVALUATED;
|
||||
int NBR_PERMUTS_CONSID;
|
||||
int NBR_BS_APPELS;
|
||||
int DIST_MAX_PERMUT;
|
||||
bool PRINT_DEBUG;
|
||||
hashMap bagOfWords;
|
||||
int TAILLE_PERMUT_MAX;
|
||||
// Increments internes
|
||||
int NBR_SEGS_EVALUATED;
|
||||
int NBR_PERMUTS_CONSID;
|
||||
int NBR_BS_APPELS;
|
||||
int DIST_MAX_PERMUT;
|
||||
bool PRINT_DEBUG;
|
||||
|
||||
// Utilisés dans minDistEdit et ils ne sont pas réajustés
|
||||
double S[1000][1000];
|
||||
char P[1000][1000];
|
||||
vector<vecInt> refSpans;
|
||||
vector<vecInt> hypSpans;
|
||||
int TAILLE_BEAM;
|
||||
// Utilisés dans minDistEdit et ils ne sont pas réajustés
|
||||
double S[1000][1000];
|
||||
char P[1000][1000];
|
||||
vector<vecInt> refSpans;
|
||||
vector<vecInt> hypSpans;
|
||||
int TAILLE_BEAM;
|
||||
|
||||
public:
|
||||
int shift_cost;
|
||||
int insert_cost;
|
||||
int delete_cost;
|
||||
int substitute_cost;
|
||||
int match_cost;
|
||||
double infinite;
|
||||
terCalc();
|
||||
public:
|
||||
int shift_cost;
|
||||
int insert_cost;
|
||||
int delete_cost;
|
||||
int substitute_cost;
|
||||
int match_cost;
|
||||
double infinite;
|
||||
terCalc();
|
||||
|
||||
// ~terCalc();
|
||||
// size_t* hashVec ( vector<string> s );
|
||||
void setDebugMode ( bool b );
|
||||
void setDebugMode ( bool b );
|
||||
// int WERCalculation ( size_t * ref, size_t * hyp );
|
||||
// int WERCalculation ( vector<string> ref, vector<string> hyp );
|
||||
// int WERCalculation ( vector<int> ref, vector<int> hyp );
|
||||
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
|
||||
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
|
||||
// string vectorToString(vector<string> vec);
|
||||
// vector<string> subVector(vector<string> vec, int start, int end);
|
||||
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
|
||||
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
|
||||
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
|
||||
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
|
||||
terAlignment TER ( vector<string> hyp, vector<string> ref );
|
||||
terAlignment TER ( vector<int> hyp, vector<int> ref );
|
||||
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
|
||||
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
|
||||
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
|
||||
alignmentStruct permuter ( vector<string> words, terShift s );
|
||||
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
|
||||
};
|
||||
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
|
||||
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
|
||||
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
|
||||
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
|
||||
terAlignment TER ( vector<string> hyp, vector<string> ref );
|
||||
terAlignment TER ( vector<int> hyp, vector<int> ref );
|
||||
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
|
||||
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
|
||||
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
|
||||
alignmentStruct permuter ( vector<string> words, terShift s );
|
||||
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
1189
mert/TER/tools.cpp
1189
mert/TER/tools.cpp
File diff suppressed because it is too large
Load Diff
107
mert/TER/tools.h
107
mert/TER/tools.h
@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
|
||||
Contact: christophe.servan@lium.univ-lemans.fr
|
||||
|
||||
The tercpp tool and library are free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the licence, or
|
||||
(at your option) any later version.
|
||||
|
||||
@ -35,32 +35,31 @@ using namespace std;
|
||||
|
||||
namespace Tools
|
||||
{
|
||||
typedef vector<double> vecDouble;
|
||||
typedef vector<char> vecChar;
|
||||
typedef vector<int> vecInt;
|
||||
typedef vector<float> vecFloat;
|
||||
typedef vector<size_t> vecSize_t;
|
||||
typedef vector<string> vecString;
|
||||
typedef vector<string> alignmentElement;
|
||||
typedef vector<alignmentElement> WERalignment;
|
||||
typedef vector<double> vecDouble;
|
||||
typedef vector<char> vecChar;
|
||||
typedef vector<int> vecInt;
|
||||
typedef vector<float> vecFloat;
|
||||
typedef vector<size_t> vecSize_t;
|
||||
typedef vector<string> vecString;
|
||||
typedef vector<string> alignmentElement;
|
||||
typedef vector<alignmentElement> WERalignment;
|
||||
|
||||
|
||||
struct param
|
||||
{
|
||||
bool debugMode;
|
||||
string referenceFile; // path to the resources
|
||||
string hypothesisFile; // path to the configuration files
|
||||
string outputFileExtension;
|
||||
string outputFileName;
|
||||
bool noPunct;
|
||||
bool caseOn;
|
||||
bool normalize;
|
||||
bool tercomLike;
|
||||
bool sgmlInputs;
|
||||
bool noTxtIds;
|
||||
bool printAlignments;
|
||||
bool WER;
|
||||
int debugLevel;
|
||||
struct param {
|
||||
bool debugMode;
|
||||
string referenceFile; // path to the resources
|
||||
string hypothesisFile; // path to the configuration files
|
||||
string outputFileExtension;
|
||||
string outputFileName;
|
||||
bool noPunct;
|
||||
bool caseOn;
|
||||
bool normalize;
|
||||
bool tercomLike;
|
||||
bool sgmlInputs;
|
||||
bool noTxtIds;
|
||||
bool printAlignments;
|
||||
bool WER;
|
||||
int debugLevel;
|
||||
};
|
||||
// param = { false, "","","","" };
|
||||
|
||||
@ -68,35 +67,35 @@ struct param
|
||||
// private:
|
||||
// public:
|
||||
|
||||
string vectorToString ( vector<string> vec );
|
||||
string vectorToString ( vector<char> vec );
|
||||
string vectorToString ( vector<int> vec );
|
||||
string vectorToString ( vector<string> vec, string s );
|
||||
string vectorToString ( vector<char> vec, string s );
|
||||
string vectorToString ( vector<int> vec, string s );
|
||||
string vectorToString ( vector<bool> vec, string s );
|
||||
string vectorToString ( char* vec, string s, int taille );
|
||||
string vectorToString ( int* vec, string s , int taille );
|
||||
string vectorToString ( bool* vec, string s , int taille );
|
||||
vector<string> subVector ( vector<string> vec, int start, int end );
|
||||
vector<int> subVector ( vector<int> vec, int start, int end );
|
||||
vector<float> subVector ( vector<float> vec, int start, int end );
|
||||
vector<string> copyVector ( vector<string> vec );
|
||||
vector<int> copyVector ( vector<int> vec );
|
||||
vector<float> copyVector ( vector<float> vec );
|
||||
vector<string> stringToVector ( string s, string tok );
|
||||
vector<string> stringToVector ( char s, string tok );
|
||||
vector<string> stringToVector ( int s, string tok );
|
||||
vector<int> stringToVectorInt ( string s, string tok );
|
||||
vector<float> stringToVectorFloat ( string s, string tok );
|
||||
string lowerCase(string str);
|
||||
string removePunct(string str);
|
||||
string tokenizePunct(string str);
|
||||
string removePunctTercom(string str);
|
||||
string normalizeStd(string str);
|
||||
string printParams(param p);
|
||||
string join ( string delim, vector<string> arr );
|
||||
string vectorToString ( vector<string> vec );
|
||||
string vectorToString ( vector<char> vec );
|
||||
string vectorToString ( vector<int> vec );
|
||||
string vectorToString ( vector<string> vec, string s );
|
||||
string vectorToString ( vector<char> vec, string s );
|
||||
string vectorToString ( vector<int> vec, string s );
|
||||
string vectorToString ( vector<bool> vec, string s );
|
||||
string vectorToString ( char* vec, string s, int taille );
|
||||
string vectorToString ( int* vec, string s , int taille );
|
||||
string vectorToString ( bool* vec, string s , int taille );
|
||||
vector<string> subVector ( vector<string> vec, int start, int end );
|
||||
vector<int> subVector ( vector<int> vec, int start, int end );
|
||||
vector<float> subVector ( vector<float> vec, int start, int end );
|
||||
vector<string> copyVector ( vector<string> vec );
|
||||
vector<int> copyVector ( vector<int> vec );
|
||||
vector<float> copyVector ( vector<float> vec );
|
||||
vector<string> stringToVector ( string s, string tok );
|
||||
vector<string> stringToVector ( char s, string tok );
|
||||
vector<string> stringToVector ( int s, string tok );
|
||||
vector<int> stringToVectorInt ( string s, string tok );
|
||||
vector<float> stringToVectorFloat ( string s, string tok );
|
||||
string lowerCase(string str);
|
||||
string removePunct(string str);
|
||||
string tokenizePunct(string str);
|
||||
string removePunctTercom(string str);
|
||||
string normalizeStd(string str);
|
||||
string printParams(param p);
|
||||
string join ( string delim, vector<string> arr );
|
||||
// };
|
||||
param copyParam(param p);
|
||||
param copyParam(param p);
|
||||
}
|
||||
#endif
|
||||
|
@ -43,7 +43,8 @@ private:
|
||||
};
|
||||
|
||||
// load hypothesis from candidate output
|
||||
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
|
||||
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
|
||||
{
|
||||
|
||||
ifstream cand(candFile.c_str());
|
||||
if (!cand.good()) throw runtime_error("Error opening candidate file");
|
||||
@ -61,7 +62,8 @@ vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
|
||||
}
|
||||
|
||||
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
|
||||
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) {
|
||||
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
|
||||
{
|
||||
vector<ScoreStats> entries;
|
||||
|
||||
Data data(g_scorer);
|
||||
@ -81,8 +83,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
|
||||
|
||||
if (nbest_input) {
|
||||
entries = loadNBest(candFile);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
entries = loadCand(candFile);
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ int main(int argc, char** argv)
|
||||
bool model_bg = false; // Use model for background corpus
|
||||
bool verbose = false; // Verbose updates
|
||||
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
|
||||
size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
|
||||
size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
|
||||
|
||||
// Command-line processing follows pro.cpp
|
||||
po::options_description desc("Allowed options");
|
||||
@ -157,7 +157,7 @@ int main(int argc, char** argv)
|
||||
do {
|
||||
size_t equals = buffer.find_last_of("=");
|
||||
UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
|
||||
<< buffer << "'");
|
||||
<< buffer << "'");
|
||||
string name = buffer.substr(0,equals);
|
||||
names.push_back(name);
|
||||
initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
|
||||
@ -183,7 +183,7 @@ int main(int argc, char** argv)
|
||||
//Make sure that SparseVector encodes dense feature names as 0..n-1.
|
||||
for (size_t i = 0; i < names.size(); ++i) {
|
||||
size_t id = SparseVector::encode(names[i]);
|
||||
assert(id == i);
|
||||
assert(id == i);
|
||||
if (verbose) cerr << names[i] << " " << initParams[i] << endl;
|
||||
}
|
||||
|
||||
@ -246,12 +246,12 @@ int main(int argc, char** argv)
|
||||
int iNumUpdates = 0;
|
||||
ValType totalLoss = 0.0;
|
||||
size_t sentenceIndex = 0;
|
||||
for(decoder->reset();!decoder->finished(); decoder->next()) {
|
||||
for(decoder->reset(); !decoder->finished(); decoder->next()) {
|
||||
HopeFearData hfd;
|
||||
decoder->HopeFear(bg,wv,&hfd);
|
||||
|
||||
|
||||
// Update weights
|
||||
if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
|
||||
if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
|
||||
// Vector difference
|
||||
MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
|
||||
// Bleu difference
|
||||
|
@ -3,26 +3,27 @@
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[]){
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
|
||||
const char * is_reordering = "false";
|
||||
const char * is_reordering = "false";
|
||||
|
||||
if (!(argc == 5 || argc == 4)) {
|
||||
// Tell the user how to run the program
|
||||
std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
|
||||
std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
|
||||
//std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (!(argc == 5 || argc == 4)) {
|
||||
// Tell the user how to run the program
|
||||
std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
|
||||
std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
|
||||
//std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc == 5) {
|
||||
is_reordering = argv[4];
|
||||
}
|
||||
if (argc == 5) {
|
||||
is_reordering = argv[4];
|
||||
}
|
||||
|
||||
createProbingPT(argv[1], argv[2], argv[3], is_reordering);
|
||||
createProbingPT(argv[1], argv[2], argv[3], is_reordering);
|
||||
|
||||
util::PrintUsage(std::cout);
|
||||
return 0;
|
||||
util::PrintUsage(std::cout);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -26,36 +26,37 @@
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 2) {
|
||||
// Tell the user how to run the program
|
||||
std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
|
||||
return 1;
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc != 2) {
|
||||
// Tell the user how to run the program
|
||||
std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
QueryEngine queries(argv[1]);
|
||||
|
||||
//Interactive search
|
||||
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
|
||||
while (true) {
|
||||
std::string cinstr = "";
|
||||
getline(std::cin, cinstr);
|
||||
if (cinstr == "exit") {
|
||||
break;
|
||||
} else {
|
||||
//Actual lookup
|
||||
std::pair<bool, std::vector<target_text> > query_result;
|
||||
query_result = queries.query(StringPiece(cinstr));
|
||||
|
||||
if (query_result.first) {
|
||||
queries.printTargetInfo(query_result.second);
|
||||
} else {
|
||||
std::cout << "Key not found!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QueryEngine queries(argv[1]);
|
||||
util::PrintUsage(std::cout);
|
||||
|
||||
//Interactive search
|
||||
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
|
||||
while (true){
|
||||
std::string cinstr = "";
|
||||
getline(std::cin, cinstr);
|
||||
if (cinstr == "exit"){
|
||||
break;
|
||||
}else{
|
||||
//Actual lookup
|
||||
std::pair<bool, std::vector<target_text> > query_result;
|
||||
query_result = queries.query(StringPiece(cinstr));
|
||||
|
||||
if (query_result.first) {
|
||||
queries.printTargetInfo(query_result.second);
|
||||
} else {
|
||||
std::cout << "Key not found!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util::PrintUsage(std::cout);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
@ -53,13 +53,15 @@ using namespace std;
|
||||
namespace po = boost::program_options;
|
||||
typedef multimap<float,string> Lines;
|
||||
|
||||
static void usage(const po::options_description& desc, char** argv) {
|
||||
cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
|
||||
cerr << desc << endl;
|
||||
static void usage(const po::options_description& desc, char** argv)
|
||||
{
|
||||
cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
|
||||
cerr << desc << endl;
|
||||
}
|
||||
|
||||
//Find top n translations of source, and send them to output
|
||||
static void outputTopN(Lines lines, size_t maxPhrases, ostream& out) {
|
||||
static void outputTopN(Lines lines, size_t maxPhrases, ostream& out)
|
||||
{
|
||||
size_t count = 0;
|
||||
for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) {
|
||||
out << i->second << endl;
|
||||
@ -92,7 +94,7 @@ static void outputTopN(const Phrase& sourcePhrase, const multimap<float,const Ta
|
||||
out << endl;
|
||||
}
|
||||
}*/
|
||||
int main(int argc, char** argv)
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
bool help;
|
||||
string input_file;
|
||||
@ -112,7 +114,7 @@ int main(int argc, char** argv)
|
||||
cmdline_options.add(desc);
|
||||
po::variables_map vm;
|
||||
po::parsed_options parsed = po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).run();
|
||||
options(cmdline_options).run();
|
||||
po::store(parsed, vm);
|
||||
po::notify(vm);
|
||||
if (help) {
|
||||
@ -135,7 +137,7 @@ int main(int argc, char** argv)
|
||||
mosesargs.push_back("-f");
|
||||
mosesargs.push_back(config_file);
|
||||
|
||||
boost::scoped_ptr<Parameter> params(new Parameter());
|
||||
boost::scoped_ptr<Parameter> params(new Parameter());
|
||||
char** mosesargv = new char*[mosesargs.size()];
|
||||
for (size_t i = 0; i < mosesargs.size(); ++i) {
|
||||
mosesargv[i] = new char[mosesargs[i].length() + 1];
|
||||
|
@ -201,7 +201,7 @@ int main(int argc, char* argv[])
|
||||
cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
|
||||
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
||||
manager.OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors(),cout);
|
||||
staticData.GetReportAllFactors(),cout);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ int main(int argc, char** argv)
|
||||
#ifdef HAVE_PROTOBUF
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
#endif
|
||||
|
||||
|
||||
// echo command line, if verbose
|
||||
IFVERBOSE(1) {
|
||||
TRACE_ERR("command: ");
|
||||
@ -121,7 +121,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// set up read/writing class
|
||||
IFVERBOSE(1) {
|
||||
PrintUserTime("Created input-output object");
|
||||
PrintUserTime("Created input-output object");
|
||||
}
|
||||
|
||||
IOWrapper* ioWrapper = new IOWrapper();
|
||||
@ -161,28 +161,26 @@ int main(int argc, char** argv)
|
||||
#ifdef PT_UG
|
||||
bool spe = params.isParamSpecified("spe-src");
|
||||
if (spe) {
|
||||
// simulated post-editing: always run single-threaded!
|
||||
// simulated post-editing: always run single-threaded!
|
||||
task->Run();
|
||||
delete task;
|
||||
string src,trg,aln;
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
<< "missing update data for simulated post-editing.");
|
||||
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
|
||||
<< "missing update data for simulated post-editing.");
|
||||
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
|
||||
{
|
||||
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
|
||||
if (sapt) sapt->add(src,trg,aln);
|
||||
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
|
||||
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
|
||||
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
|
||||
}
|
||||
}
|
||||
else
|
||||
<< "missing update data for simulated post-editing.");
|
||||
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
|
||||
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
|
||||
if (sapt) sapt->add(src,trg,aln);
|
||||
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
|
||||
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
|
||||
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
pool.Submit(task);
|
||||
pool.Submit(task);
|
||||
#else
|
||||
task->Run();
|
||||
delete task;
|
||||
|
@ -80,7 +80,7 @@ int main(int argc, char** argv)
|
||||
#ifdef HAVE_PROTOBUF
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
#endif
|
||||
|
||||
|
||||
// echo command line, if verbose
|
||||
IFVERBOSE(1) {
|
||||
TRACE_ERR("command: ");
|
||||
@ -121,7 +121,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// set up read/writing class
|
||||
IFVERBOSE(1) {
|
||||
PrintUserTime("Created input-output object");
|
||||
PrintUserTime("Created input-output object");
|
||||
}
|
||||
|
||||
IOWrapper* ioWrapper = new IOWrapper();
|
||||
|
@ -46,14 +46,13 @@ public:
|
||||
* contains such an object then returns a pointer to it; otherwise a new
|
||||
* one is inserted.
|
||||
*/
|
||||
private:
|
||||
private:
|
||||
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
|
||||
|
||||
public:
|
||||
public:
|
||||
template<typename ALNREP>
|
||||
AlignmentInfo const *
|
||||
Add(ALNREP const & aln)
|
||||
{
|
||||
AlignmentInfo const *
|
||||
Add(ALNREP const & aln) {
|
||||
return this->Add(AlignmentInfo(aln));
|
||||
}
|
||||
|
||||
|
@ -13,11 +13,11 @@ namespace Moses
|
||||
* print surface factor only for the given phrase
|
||||
*/
|
||||
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
|
||||
const std::vector<FactorType> &outputFactorOrder,
|
||||
bool reportAllFactors) const
|
||||
const std::vector<FactorType> &outputFactorOrder,
|
||||
bool reportAllFactors) const
|
||||
{
|
||||
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
|
||||
"Cannot be empty phrase");
|
||||
"Cannot be empty phrase");
|
||||
if (reportAllFactors == true) {
|
||||
out << phrase;
|
||||
} else {
|
||||
@ -26,12 +26,12 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
||||
out << *factor;
|
||||
UTIL_THROW_IF2(factor == NULL,
|
||||
"Empty factor 0 at position " << pos);
|
||||
"Empty factor 0 at position " << pos);
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
UTIL_THROW_IF2(factor == NULL,
|
||||
"Empty factor " << i << " at position " << pos);
|
||||
"Empty factor " << i << " at position " << pos);
|
||||
|
||||
out << "|" << *factor;
|
||||
}
|
||||
@ -45,7 +45,7 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
|
||||
// but there are scripts and tools that expect the output of -T to look like
|
||||
// that.
|
||||
void BaseManager::WriteApplicationContext(std::ostream &out,
|
||||
const ApplicationContext &context) const
|
||||
const ApplicationContext &context) const
|
||||
{
|
||||
assert(!context.empty());
|
||||
ApplicationContext::const_reverse_iterator p = context.rbegin();
|
||||
|
@ -17,23 +17,22 @@ protected:
|
||||
const InputType &m_source; /**< source sentence to be translated */
|
||||
|
||||
BaseManager(const InputType &source)
|
||||
:m_source(source)
|
||||
{}
|
||||
:m_source(source) {
|
||||
}
|
||||
|
||||
// output
|
||||
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
|
||||
typedef std::set< std::pair<size_t, size_t> > Alignments;
|
||||
|
||||
void OutputSurface(std::ostream &out,
|
||||
const Phrase &phrase,
|
||||
const std::vector<FactorType> &outputFactorOrder,
|
||||
bool reportAllFactors) const;
|
||||
const Phrase &phrase,
|
||||
const std::vector<FactorType> &outputFactorOrder,
|
||||
bool reportAllFactors) const;
|
||||
void WriteApplicationContext(std::ostream &out,
|
||||
const ApplicationContext &context) const;
|
||||
const ApplicationContext &context) const;
|
||||
|
||||
template <class T>
|
||||
void ShiftOffsets(std::vector<T> &offsets, T shift) const
|
||||
{
|
||||
void ShiftOffsets(std::vector<T> &offsets, T shift) const {
|
||||
T currPos = shift;
|
||||
for (size_t i = 0; i < offsets.size(); ++i) {
|
||||
if (offsets[i] == 0) {
|
||||
@ -46,8 +45,8 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
virtual ~BaseManager()
|
||||
{}
|
||||
virtual ~BaseManager() {
|
||||
}
|
||||
|
||||
//! the input sentence being decoded
|
||||
const InputType& GetSource() const {
|
||||
|
@ -162,16 +162,16 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
||||
|
||||
if (m_translations.size() > 1) {
|
||||
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
|
||||
"Non-monotonic future score: "
|
||||
<< m_translations.Get(0)->GetFutureScore() << " vs. "
|
||||
<< m_translations.Get(1)->GetFutureScore());
|
||||
"Non-monotonic future score: "
|
||||
<< m_translations.Get(0)->GetFutureScore() << " vs. "
|
||||
<< m_translations.Get(1)->GetFutureScore());
|
||||
}
|
||||
|
||||
if (m_hypotheses.size() > 1) {
|
||||
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
|
||||
"Non-monotonic total score"
|
||||
<< m_hypotheses[0]->GetTotalScore() << " vs. "
|
||||
<< m_hypotheses[1]->GetTotalScore());
|
||||
"Non-monotonic total score"
|
||||
<< m_hypotheses[0]->GetTotalScore() << " vs. "
|
||||
<< m_hypotheses[1]->GetTotalScore());
|
||||
}
|
||||
|
||||
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
|
||||
@ -446,9 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
|
||||
if (!Empty()) {
|
||||
HypothesisQueueItem *check = Dequeue(true);
|
||||
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
|
||||
"Non-monotonic total score: "
|
||||
<< item->GetHypothesis()->GetTotalScore() << " vs. "
|
||||
<< check->GetHypothesis()->GetTotalScore());
|
||||
"Non-monotonic total score: "
|
||||
<< item->GetHypothesis()->GetTotalScore() << " vs. "
|
||||
<< check->GetHypothesis()->GetTotalScore());
|
||||
}
|
||||
|
||||
// Logging for the criminally insane
|
||||
|
@ -85,7 +85,7 @@ void ChartCell::PruneToSize()
|
||||
* \param allChartCells entire chart - needed to look up underlying hypotheses
|
||||
*/
|
||||
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
||||
, const ChartCellCollection &allChartCells)
|
||||
, const ChartCellCollection &allChartCells)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
~ChartCell();
|
||||
|
||||
void Decode(const ChartTranslationOptionList &transOptList
|
||||
,const ChartCellCollection &allChartCells);
|
||||
,const ChartCellCollection &allChartCells);
|
||||
|
||||
//! Get all hypotheses in the cell that have the specified constituent label
|
||||
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
|
||||
|
@ -124,8 +124,7 @@ public:
|
||||
const ChartCellLabel *Find(size_t idx) const {
|
||||
try {
|
||||
return m_map.at(idx);
|
||||
}
|
||||
catch (const std::out_of_range& oor) {
|
||||
} catch (const std::out_of_range& oor) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -61,8 +61,7 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
|
||||
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
|
||||
m_prevHypos.reserve(childEntries.size());
|
||||
std::vector<HypothesisDimension>::const_iterator iter;
|
||||
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter)
|
||||
{
|
||||
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
|
||||
m_prevHypos.push_back(iter->GetHypothesis());
|
||||
}
|
||||
}
|
||||
@ -85,17 +84,14 @@ ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
|
||||
ChartHypothesis::~ChartHypothesis()
|
||||
{
|
||||
// delete feature function states
|
||||
for (unsigned i = 0; i < m_ffStates.size(); ++i)
|
||||
{
|
||||
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
|
||||
delete m_ffStates[i];
|
||||
}
|
||||
|
||||
// delete hypotheses that are not in the chart (recombined away)
|
||||
if (m_arcList)
|
||||
{
|
||||
if (m_arcList) {
|
||||
ChartArcList::iterator iter;
|
||||
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)
|
||||
{
|
||||
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
|
||||
ChartHypothesis *hypo = *iter;
|
||||
Delete(hypo);
|
||||
}
|
||||
@ -112,25 +108,19 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
|
||||
{
|
||||
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
|
||||
|
||||
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos)
|
||||
{
|
||||
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
|
||||
const Word &word = GetCurrTargetPhrase().GetWord(pos);
|
||||
if (word.IsNonTerminal())
|
||||
{
|
||||
if (word.IsNonTerminal()) {
|
||||
// non-term. fill out with prev hypo
|
||||
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
||||
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
||||
prevHypo->GetOutputPhrase(outPhrase);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
outPhrase.AddWord(word);
|
||||
|
||||
if (placeholderFactor != NOT_FOUND)
|
||||
{
|
||||
if (placeholderFactor != NOT_FOUND) {
|
||||
std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
|
||||
if (sourcePosSet.size() == 1)
|
||||
{
|
||||
if (sourcePosSet.size() == 1) {
|
||||
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
|
||||
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
||||
"No source rule");
|
||||
@ -140,8 +130,7 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
|
||||
UTIL_THROW_IF2(sourceWord == NULL,
|
||||
"No source word");
|
||||
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
||||
if (factor)
|
||||
{
|
||||
if (factor) {
|
||||
outPhrase.Back()[0] = factor;
|
||||
}
|
||||
}
|
||||
@ -165,33 +154,24 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr
|
||||
const TargetPhrase &tp = GetCurrTargetPhrase();
|
||||
|
||||
size_t targetSize = tp.GetSize();
|
||||
for (size_t i = 0; i < targetSize; ++i)
|
||||
{
|
||||
for (size_t i = 0; i < targetSize; ++i) {
|
||||
size_t pos;
|
||||
if (leftRightMost == 1)
|
||||
{
|
||||
if (leftRightMost == 1) {
|
||||
pos = i;
|
||||
}
|
||||
else if (leftRightMost == 2)
|
||||
{
|
||||
} else if (leftRightMost == 2) {
|
||||
pos = targetSize - i - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
|
||||
const Word &word = tp.GetWord(pos);
|
||||
|
||||
if (word.IsNonTerminal())
|
||||
{
|
||||
if (word.IsNonTerminal()) {
|
||||
// non-term. fill out with prev hypo
|
||||
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
||||
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
||||
prevHypo->GetOutputPhrase(outPhrase);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
outPhrase.AddWord(word);
|
||||
}
|
||||
|
||||
@ -236,20 +216,16 @@ void ChartHypothesis::EvaluateWhenApplied()
|
||||
// cached in the translation option-- there is no principled distinction
|
||||
const std::vector<const StatelessFeatureFunction*>& sfs =
|
||||
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for (unsigned i = 0; i < sfs.size(); ++i)
|
||||
{
|
||||
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] ))
|
||||
{
|
||||
for (unsigned i = 0; i < sfs.size(); ++i) {
|
||||
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
|
||||
sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<const StatefulFeatureFunction*>& ffs =
|
||||
StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for (unsigned i = 0; i < ffs.size(); ++i)
|
||||
{
|
||||
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] ))
|
||||
{
|
||||
for (unsigned i = 0; i < ffs.size(); ++i) {
|
||||
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
|
||||
m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
@ -257,7 +233,7 @@ void ChartHypothesis::EvaluateWhenApplied()
|
||||
// total score from current translation rule
|
||||
m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
|
||||
m_totalScore += m_currScoreBreakdown.GetWeightedScore();
|
||||
|
||||
|
||||
// total scores from prev hypos
|
||||
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis &prevHypo = **iter;
|
||||
@ -267,31 +243,25 @@ void ChartHypothesis::EvaluateWhenApplied()
|
||||
|
||||
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
|
||||
{
|
||||
if (!m_arcList)
|
||||
{
|
||||
if (loserHypo->m_arcList)
|
||||
{ // we don't have an arcList, but loser does
|
||||
if (!m_arcList) {
|
||||
if (loserHypo->m_arcList) {
|
||||
// we don't have an arcList, but loser does
|
||||
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
|
||||
loserHypo->m_arcList = 0; // prevent a double deletion
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
this->m_arcList = new ChartArcList();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (loserHypo->m_arcList)
|
||||
{ // both have an arc list: merge. delete loser
|
||||
} else {
|
||||
if (loserHypo->m_arcList) {
|
||||
// both have an arc list: merge. delete loser
|
||||
size_t my_size = m_arcList->size();
|
||||
size_t add_size = loserHypo->m_arcList->size();
|
||||
this->m_arcList->resize(my_size + add_size, 0);
|
||||
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
|
||||
delete loserHypo->m_arcList;
|
||||
loserHypo->m_arcList = 0;
|
||||
}
|
||||
else
|
||||
{ // loserHypo doesn't have any arcs
|
||||
} else {
|
||||
// loserHypo doesn't have any arcs
|
||||
// DO NOTHING
|
||||
}
|
||||
}
|
||||
@ -299,10 +269,8 @@ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
|
||||
}
|
||||
|
||||
// sorting helper
|
||||
struct CompareChartHypothesisTotalScore
|
||||
{
|
||||
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const
|
||||
{
|
||||
struct CompareChartHypothesisTotalScore {
|
||||
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
|
||||
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
|
||||
}
|
||||
};
|
||||
@ -322,8 +290,7 @@ void ChartHypothesis::CleanupArcList()
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize)
|
||||
{
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
@ -332,8 +299,7 @@ void ChartHypothesis::CleanupArcList()
|
||||
|
||||
// delete bad ones
|
||||
ChartArcList::iterator iter;
|
||||
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
|
||||
{
|
||||
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
|
||||
ChartHypothesis *arc = *iter;
|
||||
ChartHypothesis::Delete(arc);
|
||||
}
|
||||
@ -343,8 +309,7 @@ void ChartHypothesis::CleanupArcList()
|
||||
|
||||
// set all arc's main hypo variable to this hypo
|
||||
ChartArcList::iterator iter = m_arcList->begin();
|
||||
for (; iter != m_arcList->end() ; ++iter)
|
||||
{
|
||||
for (; iter != m_arcList->end() ; ++iter) {
|
||||
ChartHypothesis *arc = *iter;
|
||||
arc->SetWinningHypo(this);
|
||||
}
|
||||
@ -367,13 +332,11 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
|
||||
|
||||
// recombination
|
||||
if (hypo.GetWinningHypothesis() != NULL &&
|
||||
hypo.GetWinningHypothesis() != &hypo)
|
||||
{
|
||||
hypo.GetWinningHypothesis() != &hypo) {
|
||||
out << "->" << hypo.GetWinningHypothesis()->GetId();
|
||||
}
|
||||
|
||||
if (StaticData::Instance().GetIncludeLHSInSearchGraph())
|
||||
{
|
||||
if (StaticData::Instance().GetIncludeLHSInSearchGraph()) {
|
||||
out << " " << hypo.GetTargetLHS() << "=>";
|
||||
}
|
||||
out << " " << hypo.GetCurrTargetPhrase()
|
||||
@ -381,8 +344,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
|
||||
<< " " << hypo.GetCurrSourceRange();
|
||||
|
||||
HypoList::const_iterator iter;
|
||||
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter)
|
||||
{
|
||||
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
|
||||
const ChartHypothesis &prevHypo = **iter;
|
||||
out << " " << prevHypo.GetId();
|
||||
}
|
||||
|
@ -58,8 +58,8 @@ protected:
|
||||
WordsRange m_currSourceWordsRange;
|
||||
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
|
||||
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
|
||||
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
||||
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
|
||||
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
||||
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
|
||||
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
|
||||
,m_lmNGram
|
||||
,m_lmPrefix;
|
||||
@ -82,21 +82,18 @@ protected:
|
||||
|
||||
public:
|
||||
#ifdef USE_HYPO_POOL
|
||||
void *operator new(size_t /* num_bytes */)
|
||||
{
|
||||
void *operator new(size_t /* num_bytes */) {
|
||||
void *ptr = s_objectPool.getPtr();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
//! delete \param hypo. Works with object pool too
|
||||
static void Delete(ChartHypothesis *hypo)
|
||||
{
|
||||
static void Delete(ChartHypothesis *hypo) {
|
||||
s_objectPool.freeObject(hypo);
|
||||
}
|
||||
#else
|
||||
//! delete \param hypo. Works with object pool too
|
||||
static void Delete(ChartHypothesis *hypo)
|
||||
{
|
||||
static void Delete(ChartHypothesis *hypo) {
|
||||
delete hypo;
|
||||
}
|
||||
#endif
|
||||
@ -109,43 +106,36 @@ public:
|
||||
|
||||
~ChartHypothesis();
|
||||
|
||||
unsigned GetId() const
|
||||
{
|
||||
unsigned GetId() const {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
const ChartTranslationOption &GetTranslationOption() const
|
||||
{
|
||||
const ChartTranslationOption &GetTranslationOption() const {
|
||||
return *m_transOpt;
|
||||
}
|
||||
|
||||
//! Get the rule that created this hypothesis
|
||||
const TargetPhrase &GetCurrTargetPhrase() const
|
||||
{
|
||||
const TargetPhrase &GetCurrTargetPhrase() const {
|
||||
return m_transOpt->GetPhrase();
|
||||
}
|
||||
|
||||
//! the source range that this hypothesis spans
|
||||
const WordsRange &GetCurrSourceRange() const
|
||||
{
|
||||
const WordsRange &GetCurrSourceRange() const {
|
||||
return m_currSourceWordsRange;
|
||||
}
|
||||
|
||||
//! the arc list when creating n-best lists
|
||||
inline const ChartArcList* GetArcList() const
|
||||
{
|
||||
inline const ChartArcList* GetArcList() const {
|
||||
return m_arcList;
|
||||
}
|
||||
|
||||
//! the feature function states for a particular feature \param featureID
|
||||
inline const FFState* GetFFState( size_t featureID ) const
|
||||
{
|
||||
inline const FFState* GetFFState( size_t featureID ) const {
|
||||
return m_ffStates[ featureID ];
|
||||
}
|
||||
|
||||
//! reference back to the manager
|
||||
inline const ChartManager& GetManager() const
|
||||
{
|
||||
inline const ChartManager& GetManager() const {
|
||||
return m_manager;
|
||||
}
|
||||
|
||||
@ -165,21 +155,17 @@ public:
|
||||
void SetWinningHypo(const ChartHypothesis *hypo);
|
||||
|
||||
//! get the unweighted score for each feature function
|
||||
const ScoreComponentCollection &GetScoreBreakdown() const
|
||||
{
|
||||
const ScoreComponentCollection &GetScoreBreakdown() const {
|
||||
// Note: never call this method before m_currScoreBreakdown is fully computed
|
||||
if (!m_scoreBreakdown.get())
|
||||
{
|
||||
if (!m_scoreBreakdown.get()) {
|
||||
m_scoreBreakdown.reset(new ScoreComponentCollection());
|
||||
// score breakdown from current translation rule
|
||||
if (m_transOpt)
|
||||
{
|
||||
if (m_transOpt) {
|
||||
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
||||
}
|
||||
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
||||
// score breakdowns from prev hypos
|
||||
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter)
|
||||
{
|
||||
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis &prevHypo = **iter;
|
||||
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
|
||||
}
|
||||
@ -188,15 +174,12 @@ public:
|
||||
}
|
||||
|
||||
//! get the unweighted score delta for each feature function
|
||||
const ScoreComponentCollection &GetDeltaScoreBreakdown() const
|
||||
{
|
||||
const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
|
||||
// Note: never call this method before m_currScoreBreakdown is fully computed
|
||||
if (!m_deltaScoreBreakdown.get())
|
||||
{
|
||||
if (!m_deltaScoreBreakdown.get()) {
|
||||
m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
|
||||
// score breakdown from current translation rule
|
||||
if (m_transOpt)
|
||||
{
|
||||
if (m_transOpt) {
|
||||
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
||||
}
|
||||
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
||||
@ -206,33 +189,28 @@ public:
|
||||
}
|
||||
|
||||
//! Get the weighted total score
|
||||
float GetTotalScore() const
|
||||
{
|
||||
float GetTotalScore() const {
|
||||
// scores from current translation rule. eg. translation models & word penalty
|
||||
return m_totalScore;
|
||||
}
|
||||
|
||||
//! vector of previous hypotheses this hypo is built on
|
||||
const std::vector<const ChartHypothesis*> &GetPrevHypos() const
|
||||
{
|
||||
const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
|
||||
return m_prevHypos;
|
||||
}
|
||||
|
||||
//! get a particular previous hypos
|
||||
const ChartHypothesis* GetPrevHypo(size_t pos) const
|
||||
{
|
||||
const ChartHypothesis* GetPrevHypo(size_t pos) const {
|
||||
return m_prevHypos[pos];
|
||||
}
|
||||
|
||||
//! get the constituency label that covers this hypo
|
||||
const Word &GetTargetLHS() const
|
||||
{
|
||||
const Word &GetTargetLHS() const {
|
||||
return GetCurrTargetPhrase().GetTargetLHS();
|
||||
}
|
||||
|
||||
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
|
||||
const ChartHypothesis* GetWinningHypothesis() const
|
||||
{
|
||||
const ChartHypothesis* GetWinningHypothesis() const {
|
||||
return m_winningHypo;
|
||||
}
|
||||
|
||||
|
@ -125,7 +125,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
|
||||
}
|
||||
|
||||
// Generate the score breakdown of the derivation d.
|
||||
boost::shared_ptr<ScoreComponentCollection>
|
||||
boost::shared_ptr<ScoreComponentCollection>
|
||||
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
|
||||
{
|
||||
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
||||
@ -169,8 +169,7 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
|
||||
|
||||
mytree->Combine(previous_trees);
|
||||
return mytree;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
|
||||
}
|
||||
}
|
||||
|
@ -290,12 +290,14 @@ void ChartManager::FindReachableHypotheses(
|
||||
}
|
||||
}
|
||||
|
||||
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const {
|
||||
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
|
||||
{
|
||||
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
|
||||
WriteSearchGraph(writer);
|
||||
}
|
||||
|
||||
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const {
|
||||
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
|
||||
{
|
||||
ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId());
|
||||
WriteSearchGraph(writer);
|
||||
}
|
||||
@ -304,33 +306,33 @@ void ChartManager::OutputBest(OutputCollector *collector) const
|
||||
{
|
||||
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
||||
if (collector && bestHypo) {
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
||||
OutputBestHypo(collector, bestHypo, translationId);
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
||||
OutputBestHypo(collector, bestHypo, translationId);
|
||||
}
|
||||
}
|
||||
|
||||
void ChartManager::OutputNBest(OutputCollector *collector) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
if (nBestSize > 0) {
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
if (nBestSize > 0) {
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("N-Best Hypotheses Generation Time:");
|
||||
}
|
||||
}
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("N-Best Hypotheses Generation Time:");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId) const
|
||||
const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||
@ -344,7 +346,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
}
|
||||
|
||||
bool includeWordAlignment =
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
|
||||
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
|
||||
|
||||
@ -357,7 +359,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
|
||||
// delete <s> and </s>
|
||||
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
outputPhrase.RemoveWord(0);
|
||||
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
||||
|
||||
@ -405,9 +407,9 @@ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
|
||||
}
|
||||
|
||||
size_t ChartManager::OutputAlignmentNBest(
|
||||
Alignments &retAlign,
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget) const
|
||||
Alignments &retAlign,
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget) const
|
||||
{
|
||||
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
|
||||
|
||||
@ -448,7 +450,7 @@ size_t ChartManager::OutputAlignmentNBest(
|
||||
// Recursively look thru child hypos
|
||||
size_t currStartTarget = startTarget + totalTargetSize;
|
||||
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
||||
currStartTarget);
|
||||
currStartTarget);
|
||||
targetOffsets[targetPos] = targetSize;
|
||||
|
||||
totalTargetSize += targetSize;
|
||||
@ -486,22 +488,22 @@ size_t ChartManager::OutputAlignmentNBest(
|
||||
void ChartManager::OutputAlignment(OutputCollector *collector) const
|
||||
{
|
||||
if (collector == NULL) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
ostringstream out;
|
||||
|
||||
const ChartHypothesis *hypo = GetBestHypothesis();
|
||||
if (hypo) {
|
||||
Alignments retAlign;
|
||||
OutputAlignment(retAlign, hypo, 0);
|
||||
Alignments retAlign;
|
||||
OutputAlignment(retAlign, hypo, 0);
|
||||
|
||||
// output alignments
|
||||
Alignments::const_iterator iter;
|
||||
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
|
||||
const pair<size_t, size_t> &alignPoint = *iter;
|
||||
out << alignPoint.first << "-" << alignPoint.second << " ";
|
||||
}
|
||||
// output alignments
|
||||
Alignments::const_iterator iter;
|
||||
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
|
||||
const pair<size_t, size_t> &alignPoint = *iter;
|
||||
out << alignPoint.first << "-" << alignPoint.second << " ";
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
|
||||
@ -510,8 +512,8 @@ void ChartManager::OutputAlignment(OutputCollector *collector) const
|
||||
}
|
||||
|
||||
size_t ChartManager::OutputAlignment(Alignments &retAlign,
|
||||
const Moses::ChartHypothesis *hypo,
|
||||
size_t startTarget) const
|
||||
const Moses::ChartHypothesis *hypo,
|
||||
size_t startTarget) const
|
||||
{
|
||||
size_t totalTargetSize = 0;
|
||||
size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
|
||||
@ -536,7 +538,7 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
|
||||
size_t targetInd = 0;
|
||||
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
||||
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
||||
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
||||
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
||||
size_t sourceInd = targetPos2SourceInd[targetPos];
|
||||
size_t sourcePos = sourceInd2pos[sourceInd];
|
||||
|
||||
@ -587,19 +589,19 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
|
||||
|
||||
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
|
||||
{
|
||||
if (collector) {
|
||||
OutputDetailedTranslationReport(collector,
|
||||
GetBestHypothesis(),
|
||||
static_cast<const Sentence&>(m_source),
|
||||
m_source.GetTranslationId());
|
||||
}
|
||||
if (collector) {
|
||||
OutputDetailedTranslationReport(collector,
|
||||
GetBestHypothesis(),
|
||||
static_cast<const Sentence&>(m_source),
|
||||
m_source.GetTranslationId());
|
||||
}
|
||||
}
|
||||
|
||||
void ChartManager::OutputDetailedTranslationReport(
|
||||
OutputCollector *collector,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
OutputCollector *collector,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
{
|
||||
if (hypo == NULL) {
|
||||
return;
|
||||
@ -610,24 +612,24 @@ void ChartManager::OutputDetailedTranslationReport(
|
||||
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
collector->Write(translationId, out.str());
|
||||
|
||||
//DIMw
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
//DIMw
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
|
||||
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
||||
}
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
|
||||
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ChartManager::OutputTranslationOptions(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
{
|
||||
if (hypo != NULL) {
|
||||
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
||||
@ -644,10 +646,10 @@ void ChartManager::OutputTranslationOptions(std::ostream &out,
|
||||
}
|
||||
|
||||
void ChartManager::OutputTranslationOption(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
{
|
||||
ReconstructApplicationContext(*hypo, sentence, applicationContext);
|
||||
out << "Trans Opt " << translationId
|
||||
@ -691,16 +693,16 @@ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
|
||||
void ChartManager::OutputUnknowns(OutputCollector *collector) const
|
||||
{
|
||||
if (collector) {
|
||||
long translationId = m_source.GetTranslationId();
|
||||
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
|
||||
long translationId = m_source.GetTranslationId();
|
||||
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
|
||||
|
||||
std::ostringstream out;
|
||||
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
|
||||
p != oovs.end(); ++p) {
|
||||
out << *p;
|
||||
}
|
||||
out << std::endl;
|
||||
collector->Write(translationId, out.str());
|
||||
std::ostringstream out;
|
||||
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
|
||||
p != oovs.end(); ++p) {
|
||||
out << *p;
|
||||
}
|
||||
out << std::endl;
|
||||
collector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
}
|
||||
@ -709,7 +711,7 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
|
||||
{
|
||||
const ChartHypothesis *hypo = GetBestHypothesis();
|
||||
if (collector == NULL || hypo == NULL) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
std::ostringstream out;
|
||||
@ -723,14 +725,14 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
|
||||
//Tree of full sentence
|
||||
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
|
||||
if (treeStructure != NULL) {
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
if (sff[i] == treeStructure) {
|
||||
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
|
||||
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
if (sff[i] == treeStructure) {
|
||||
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
|
||||
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collector->Write(translationId, out.str());
|
||||
@ -738,10 +740,10 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
|
||||
}
|
||||
|
||||
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
{
|
||||
|
||||
if (hypo != NULL) {
|
||||
@ -769,20 +771,20 @@ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
||||
|
||||
void ChartManager::OutputSearchGraph(OutputCollector *collector) const
|
||||
{
|
||||
if (collector) {
|
||||
long translationId = m_source.GetTranslationId();
|
||||
std::ostringstream out;
|
||||
OutputSearchGraphMoses( out);
|
||||
collector->Write(translationId, out.str());
|
||||
}
|
||||
if (collector) {
|
||||
long translationId = m_source.GetTranslationId();
|
||||
std::ostringstream out;
|
||||
OutputSearchGraphMoses( out);
|
||||
collector->Write(translationId, out.str());
|
||||
}
|
||||
}
|
||||
|
||||
//DIMw
|
||||
void ChartManager::OutputDetailedAllTranslationReport(
|
||||
OutputCollector *collector,
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
OutputCollector *collector,
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const Sentence &sentence,
|
||||
long translationId) const
|
||||
{
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
@ -813,8 +815,8 @@ void ChartManager::OutputSearchGraphHypergraph() const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (staticData.GetOutputSearchGraphHypergraph()) {
|
||||
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
|
||||
hypergraphOutputChart.Write(*this);
|
||||
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
|
||||
hypergraphOutputChart.Write(*this);
|
||||
}
|
||||
}
|
||||
|
||||
@ -842,7 +844,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
|
||||
|
||||
// delete 1st & last
|
||||
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
|
||||
outPhrase.RemoveWord(0);
|
||||
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
||||
|
@ -56,49 +56,49 @@ private:
|
||||
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
|
||||
|
||||
/* auxilliary functions for SearchGraphs */
|
||||
void FindReachableHypotheses(
|
||||
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
|
||||
void FindReachableHypotheses(
|
||||
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
|
||||
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
|
||||
|
||||
// output
|
||||
void OutputNBestList(OutputCollector *collector,
|
||||
const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId) const;
|
||||
const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId) const;
|
||||
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
|
||||
size_t OutputAlignmentNBest(Alignments &retAlign,
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget) const;
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget) const;
|
||||
size_t OutputAlignment(Alignments &retAlign,
|
||||
const Moses::ChartHypothesis *hypo,
|
||||
size_t startTarget) const;
|
||||
const Moses::ChartHypothesis *hypo,
|
||||
size_t startTarget) const;
|
||||
void OutputDetailedTranslationReport(
|
||||
OutputCollector *collector,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
OutputCollector *collector,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
void OutputTranslationOptions(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
void OutputTranslationOption(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
void ReconstructApplicationContext(const ChartHypothesis &hypo,
|
||||
const Sentence &sentence,
|
||||
ApplicationContext &context) const;
|
||||
const Sentence &sentence,
|
||||
ApplicationContext &context) const;
|
||||
void OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
ApplicationContext &applicationContext,
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
void OutputDetailedAllTranslationReport(
|
||||
OutputCollector *collector,
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
OutputCollector *collector,
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const Sentence &sentence,
|
||||
long translationId) const;
|
||||
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
|
||||
void Backtrack(const ChartHypothesis *hypo) const;
|
||||
|
||||
@ -126,8 +126,8 @@ public:
|
||||
return m_hypoStackColl;
|
||||
}
|
||||
|
||||
void CalcDecoderStatistics() const
|
||||
{}
|
||||
void CalcDecoderStatistics() const {
|
||||
}
|
||||
|
||||
void ResetSentenceStats(const InputType& source) {
|
||||
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
||||
@ -138,22 +138,24 @@ public:
|
||||
return m_hypothesisId++;
|
||||
}
|
||||
|
||||
const ChartParser &GetParser() const { return m_parser; }
|
||||
const ChartParser &GetParser() const {
|
||||
return m_parser;
|
||||
}
|
||||
|
||||
// outputs
|
||||
void OutputBest(OutputCollector *collector) const;
|
||||
void OutputNBest(OutputCollector *collector) const;
|
||||
void OutputLatticeSamples(OutputCollector *collector) const
|
||||
{}
|
||||
void OutputLatticeSamples(OutputCollector *collector) const {
|
||||
}
|
||||
void OutputAlignment(OutputCollector *collector) const;
|
||||
void OutputDetailedTranslationReport(OutputCollector *collector) const;
|
||||
void OutputUnknowns(OutputCollector *collector) const;
|
||||
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
|
||||
void OutputWordGraph(OutputCollector *collector) const
|
||||
{}
|
||||
void OutputWordGraph(OutputCollector *collector) const {
|
||||
}
|
||||
void OutputSearchGraph(OutputCollector *collector) const;
|
||||
void OutputSearchGraphSLF() const
|
||||
{}
|
||||
void OutputSearchGraphSLF() const {
|
||||
}
|
||||
void OutputSearchGraphHypergraph() const;
|
||||
|
||||
};
|
||||
|
@ -65,7 +65,7 @@ public:
|
||||
* \param outColl return argument
|
||||
*/
|
||||
virtual void GetChartRuleCollection(
|
||||
const InputPath &inputPath,
|
||||
const InputPath &inputPath,
|
||||
size_t lastPos, // last position to consider if using lookahead
|
||||
ChartParserCallback &outColl) = 0;
|
||||
|
||||
|
@ -11,8 +11,8 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
|
||||
}
|
||||
|
||||
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const StackVec &stackVec)
|
||||
const InputPath &inputPath,
|
||||
const StackVec &stackVec)
|
||||
{
|
||||
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
||||
|
||||
|
@ -46,8 +46,8 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const StackVec &stackVec);
|
||||
const InputPath &inputPath,
|
||||
const StackVec &stackVec);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -71,10 +71,9 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
|
||||
ChartTranslationOption *transOpt = m_collection[i].get();
|
||||
|
||||
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
|
||||
++numDiscard;
|
||||
}
|
||||
else if (numDiscard) {
|
||||
m_collection[i - numDiscard] = m_collection[i];
|
||||
++numDiscard;
|
||||
} else if (numDiscard) {
|
||||
m_collection[i - numDiscard] = m_collection[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -135,12 +134,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
|
||||
{
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOption &transOpt = *obj.m_collection[i];
|
||||
out << transOpt << endl;
|
||||
}
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOption &transOpt = *obj.m_collection[i];
|
||||
out << transOpt << endl;
|
||||
}
|
||||
|
||||
return out;
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,297 +13,297 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
struct CNStats {
|
||||
size_t created,destr,read,colls,words;
|
||||
|
||||
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
|
||||
~CNStats() {
|
||||
print(std::cerr);
|
||||
}
|
||||
struct CNStats {
|
||||
size_t created,destr,read,colls,words;
|
||||
|
||||
void createOne() {
|
||||
++created;
|
||||
}
|
||||
void destroyOne() {
|
||||
++destr;
|
||||
}
|
||||
|
||||
void collect(const ConfusionNet& cn) {
|
||||
++read;
|
||||
colls+=cn.GetSize();
|
||||
for(size_t i=0; i<cn.GetSize(); ++i)
|
||||
words+=cn[i].size();
|
||||
}
|
||||
void print(std::ostream& out) const {
|
||||
if(created>0) {
|
||||
out<<"confusion net statistics:\n"
|
||||
" created:\t"<<created<<"\n"
|
||||
" destroyed:\t"<<destr<<"\n"
|
||||
" succ. read:\t"<<read<<"\n"
|
||||
" columns:\t"<<colls<<"\n"
|
||||
" words:\t"<<words<<"\n"
|
||||
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
|
||||
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
|
||||
"\n\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CNStats stats;
|
||||
|
||||
size_t
|
||||
ConfusionNet::
|
||||
GetColumnIncrement(size_t i, size_t j) const
|
||||
{
|
||||
(void) i;
|
||||
(void) j;
|
||||
return 1;
|
||||
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
|
||||
~CNStats() {
|
||||
print(std::cerr);
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
ConfusionNet()
|
||||
: InputType()
|
||||
{
|
||||
stats.createOne();
|
||||
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
if (staticData.IsChart()) {
|
||||
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
|
||||
}
|
||||
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
|
||||
void createOne() {
|
||||
++created;
|
||||
}
|
||||
void destroyOne() {
|
||||
++destr;
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
~ConfusionNet()
|
||||
{
|
||||
stats.destroyOne();
|
||||
void collect(const ConfusionNet& cn) {
|
||||
++read;
|
||||
colls+=cn.GetSize();
|
||||
for(size_t i=0; i<cn.GetSize(); ++i)
|
||||
words+=cn[i].size();
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
ConfusionNet(Sentence const& s)
|
||||
{
|
||||
data.resize(s.GetSize());
|
||||
for(size_t i=0; i<s.GetSize(); ++i) {
|
||||
ScorePair scorePair;
|
||||
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
|
||||
data[i].push_back(temp);
|
||||
void print(std::ostream& out) const {
|
||||
if(created>0) {
|
||||
out<<"confusion net statistics:\n"
|
||||
" created:\t"<<created<<"\n"
|
||||
" destroyed:\t"<<destr<<"\n"
|
||||
" succ. read:\t"<<read<<"\n"
|
||||
" columns:\t"<<colls<<"\n"
|
||||
" words:\t"<<words<<"\n"
|
||||
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
|
||||
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
|
||||
"\n\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
|
||||
{
|
||||
VERBOSE(2, "read confusion net with format "<<format<<"\n");
|
||||
switch(format) {
|
||||
case 0:
|
||||
return ReadFormat0(in,factorOrder);
|
||||
case 1:
|
||||
return ReadFormat1(in,factorOrder);
|
||||
default:
|
||||
std::cerr << "ERROR: unknown format '"<<format
|
||||
<<"' in ConfusionNet::Read";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
CNStats stats;
|
||||
|
||||
int
|
||||
ConfusionNet::
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
int rv=ReadF(in,factorOrder,0);
|
||||
if(rv) stats.collect(*this);
|
||||
return rv;
|
||||
size_t
|
||||
ConfusionNet::
|
||||
GetColumnIncrement(size_t i, size_t j) const
|
||||
{
|
||||
(void) i;
|
||||
(void) j;
|
||||
return 1;
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
ConfusionNet()
|
||||
: InputType()
|
||||
{
|
||||
stats.createOne();
|
||||
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
if (staticData.IsChart()) {
|
||||
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
|
||||
}
|
||||
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
~ConfusionNet()
|
||||
{
|
||||
stats.destroyOne();
|
||||
}
|
||||
|
||||
ConfusionNet::
|
||||
ConfusionNet(Sentence const& s)
|
||||
{
|
||||
data.resize(s.GetSize());
|
||||
for(size_t i=0; i<s.GetSize(); ++i) {
|
||||
ScorePair scorePair;
|
||||
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
|
||||
data[i].push_back(temp);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
|
||||
{
|
||||
VERBOSE(2, "read confusion net with format "<<format<<"\n");
|
||||
switch(format) {
|
||||
case 0:
|
||||
return ReadFormat0(in,factorOrder);
|
||||
case 1:
|
||||
return ReadFormat1(in,factorOrder);
|
||||
default:
|
||||
std::cerr << "ERROR: unknown format '"<<format
|
||||
<<"' in ConfusionNet::Read";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
ConfusionNet::
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
int rv=ReadF(in,factorOrder,0);
|
||||
if(rv) stats.collect(*this);
|
||||
return rv;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Deprecated due to code duplication;
|
||||
// use Word::CreateFromString() instead
|
||||
void
|
||||
ConfusionNet::
|
||||
String2Word(const std::string& s,Word& w,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
std::vector<std::string> factorStrVector = Tokenize(s, "|");
|
||||
for(size_t i=0; i<factorOrder.size(); ++i)
|
||||
w.SetFactor(factorOrder[i],
|
||||
FactorCollection::Instance().AddFactor
|
||||
(Input,factorOrder[i], factorStrVector[i]));
|
||||
}
|
||||
// Deprecated due to code duplication;
|
||||
// use Word::CreateFromString() instead
|
||||
void
|
||||
ConfusionNet::
|
||||
String2Word(const std::string& s,Word& w,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
std::vector<std::string> factorStrVector = Tokenize(s, "|");
|
||||
for(size_t i=0; i<factorOrder.size(); ++i)
|
||||
w.SetFactor(factorOrder[i],
|
||||
FactorCollection::Instance().AddFactor
|
||||
(Input,factorOrder[i], factorStrVector[i]));
|
||||
}
|
||||
#endif
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
Clear();
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
Clear();
|
||||
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
const InputFeature &inputFeature = InputFeature::Instance();
|
||||
size_t numInputScores = inputFeature.GetNumInputScores();
|
||||
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
const InputFeature &inputFeature = InputFeature::Instance();
|
||||
size_t numInputScores = inputFeature.GetNumInputScores();
|
||||
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
|
||||
|
||||
size_t totalCount = numInputScores + numRealWordCount;
|
||||
bool addRealWordCount = (numRealWordCount > 0);
|
||||
size_t totalCount = numInputScores + numRealWordCount;
|
||||
bool addRealWordCount = (numRealWordCount > 0);
|
||||
|
||||
std::string line;
|
||||
while(getline(in,line)) {
|
||||
std::istringstream is(line);
|
||||
std::string word;
|
||||
std::string line;
|
||||
while(getline(in,line)) {
|
||||
std::istringstream is(line);
|
||||
std::string word;
|
||||
|
||||
Column col;
|
||||
while(is>>word) {
|
||||
Word w;
|
||||
// String2Word(word,w,factorOrder);
|
||||
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
||||
std::vector<float> probs(totalCount, 0.0);
|
||||
for(size_t i=0; i < numInputScores; i++) {
|
||||
double prob;
|
||||
if (!(is>>prob)) {
|
||||
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
|
||||
return false;
|
||||
}
|
||||
if(prob<0.0) {
|
||||
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
|
||||
prob=0.0;
|
||||
} else if (prob>1.0) {
|
||||
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
|
||||
prob=1.0;
|
||||
}
|
||||
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
|
||||
Column col;
|
||||
while(is>>word) {
|
||||
Word w;
|
||||
// String2Word(word,w,factorOrder);
|
||||
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
||||
std::vector<float> probs(totalCount, 0.0);
|
||||
for(size_t i=0; i < numInputScores; i++) {
|
||||
double prob;
|
||||
if (!(is>>prob)) {
|
||||
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
|
||||
return false;
|
||||
}
|
||||
if(prob<0.0) {
|
||||
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
|
||||
prob=0.0;
|
||||
} else if (prob>1.0) {
|
||||
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
|
||||
prob=1.0;
|
||||
}
|
||||
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
|
||||
|
||||
}
|
||||
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
|
||||
if (addRealWordCount && word!=EPSILON && word!="")
|
||||
probs.back() = -1.0;
|
||||
|
||||
ScorePair scorePair(probs);
|
||||
|
||||
col.push_back(std::make_pair(w,scorePair));
|
||||
}
|
||||
if(col.size()) {
|
||||
data.push_back(col);
|
||||
ShrinkToFit(data.back());
|
||||
} else break;
|
||||
}
|
||||
return !data.empty();
|
||||
}
|
||||
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
|
||||
if (addRealWordCount && word!=EPSILON && word!="")
|
||||
probs.back() = -1.0;
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
Clear();
|
||||
std::string line;
|
||||
ScorePair scorePair(probs);
|
||||
|
||||
col.push_back(std::make_pair(w,scorePair));
|
||||
}
|
||||
if(col.size()) {
|
||||
data.push_back(col);
|
||||
ShrinkToFit(data.back());
|
||||
} else break;
|
||||
}
|
||||
return !data.empty();
|
||||
}
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
Clear();
|
||||
std::string line;
|
||||
if(!getline(in,line)) return 0;
|
||||
size_t s;
|
||||
if(getline(in,line)) s=atoi(line.c_str());
|
||||
else return 0;
|
||||
data.resize(s);
|
||||
for(size_t i=0; i<data.size(); ++i) {
|
||||
if(!getline(in,line)) return 0;
|
||||
size_t s;
|
||||
if(getline(in,line)) s=atoi(line.c_str());
|
||||
else return 0;
|
||||
data.resize(s);
|
||||
for(size_t i=0; i<data.size(); ++i) {
|
||||
if(!getline(in,line)) return 0;
|
||||
std::istringstream is(line);
|
||||
if(!(is>>s)) return 0;
|
||||
std::string word;
|
||||
double prob;
|
||||
data[i].resize(s);
|
||||
for(size_t j=0; j<s; ++j)
|
||||
if(is>>word>>prob) {
|
||||
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
|
||||
data[i][j].second.denseScores = std::vector<float> (1);
|
||||
data[i][j].second.denseScores.push_back((float) log(prob));
|
||||
if(data[i][j].second.denseScores[0]<0) {
|
||||
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
|
||||
data[i][j].second.denseScores[0]=0.0;
|
||||
}
|
||||
// String2Word(word,data[i][j].first,factorOrder);
|
||||
Word& w = data[i][j].first;
|
||||
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
||||
} else return 0;
|
||||
}
|
||||
return !data.empty();
|
||||
std::istringstream is(line);
|
||||
if(!(is>>s)) return 0;
|
||||
std::string word;
|
||||
double prob;
|
||||
data[i].resize(s);
|
||||
for(size_t j=0; j<s; ++j)
|
||||
if(is>>word>>prob) {
|
||||
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
|
||||
data[i][j].second.denseScores = std::vector<float> (1);
|
||||
data[i][j].second.denseScores.push_back((float) log(prob));
|
||||
if(data[i][j].second.denseScores[0]<0) {
|
||||
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
|
||||
data[i][j].second.denseScores[0]=0.0;
|
||||
}
|
||||
// String2Word(word,data[i][j].first,factorOrder);
|
||||
Word& w = data[i][j].first;
|
||||
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
||||
} else return 0;
|
||||
}
|
||||
return !data.empty();
|
||||
}
|
||||
|
||||
void ConfusionNet::Print(std::ostream& out) const
|
||||
{
|
||||
out<<"conf net: "<<data.size()<<"\n";
|
||||
for(size_t i=0; i<data.size(); ++i) {
|
||||
out<<i<<" -- ";
|
||||
for(size_t j=0; j<data[i].size(); ++j) {
|
||||
out<<"("<<data[i][j].first.ToString()<<", ";
|
||||
void ConfusionNet::Print(std::ostream& out) const
|
||||
{
|
||||
out<<"conf net: "<<data.size()<<"\n";
|
||||
for(size_t i=0; i<data.size(); ++i) {
|
||||
out<<i<<" -- ";
|
||||
for(size_t j=0; j<data[i].size(); ++j) {
|
||||
out<<"("<<data[i][j].first.ToString()<<", ";
|
||||
|
||||
// dense
|
||||
std::vector<float>::const_iterator iterDense;
|
||||
for(iterDense = data[i][j].second.denseScores.begin();
|
||||
iterDense < data[i][j].second.denseScores.end();
|
||||
++iterDense) {
|
||||
out<<", "<<*iterDense;
|
||||
}
|
||||
|
||||
// sparse
|
||||
std::map<StringPiece, float>::const_iterator iterSparse;
|
||||
for(iterSparse = data[i][j].second.sparseScores.begin();
|
||||
iterSparse != data[i][j].second.sparseScores.end();
|
||||
++iterSparse) {
|
||||
out << ", " << iterSparse->first << "=" << iterSparse->second;
|
||||
}
|
||||
|
||||
out<<") ";
|
||||
// dense
|
||||
std::vector<float>::const_iterator iterDense;
|
||||
for(iterDense = data[i][j].second.denseScores.begin();
|
||||
iterDense < data[i][j].second.denseScores.end();
|
||||
++iterDense) {
|
||||
out<<", "<<*iterDense;
|
||||
}
|
||||
out<<"\n";
|
||||
|
||||
// sparse
|
||||
std::map<StringPiece, float>::const_iterator iterSparse;
|
||||
for(iterSparse = data[i][j].second.sparseScores.begin();
|
||||
iterSparse != data[i][j].second.sparseScores.end();
|
||||
++iterSparse) {
|
||||
out << ", " << iterSparse->first << "=" << iterSparse->second;
|
||||
}
|
||||
|
||||
out<<") ";
|
||||
}
|
||||
out<<"\n\n";
|
||||
out<<"\n";
|
||||
}
|
||||
out<<"\n\n";
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable:4716)
|
||||
#endif
|
||||
Phrase
|
||||
ConfusionNet::
|
||||
GetSubString(const WordsRange&) const
|
||||
{
|
||||
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
|
||||
//return Phrase(Input);
|
||||
}
|
||||
Phrase
|
||||
ConfusionNet::
|
||||
GetSubString(const WordsRange&) const
|
||||
{
|
||||
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
|
||||
//return Phrase(Input);
|
||||
}
|
||||
|
||||
std::string
|
||||
ConfusionNet::
|
||||
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
|
||||
{
|
||||
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
|
||||
return "";
|
||||
}
|
||||
std::string
|
||||
ConfusionNet::
|
||||
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
|
||||
{
|
||||
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
|
||||
return "";
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable:4716)
|
||||
#endif
|
||||
const Word& ConfusionNet::GetWord(size_t) const
|
||||
{
|
||||
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
|
||||
}
|
||||
const Word& ConfusionNet::GetWord(size_t) const
|
||||
{
|
||||
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#pragma warning(default:4716)
|
||||
#endif
|
||||
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
|
||||
{
|
||||
cn.Print(out);
|
||||
return out;
|
||||
}
|
||||
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
|
||||
{
|
||||
cn.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
TranslationOptionCollection*
|
||||
ConfusionNet::
|
||||
CreateTranslationOptionCollection() const
|
||||
{
|
||||
size_t maxNoTransOptPerCoverage
|
||||
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
float translationOptionThreshold
|
||||
= StaticData::Instance().GetTranslationOptionThreshold();
|
||||
TranslationOptionCollection *rv
|
||||
= new TranslationOptionCollectionConfusionNet
|
||||
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
assert(rv);
|
||||
return rv;
|
||||
}
|
||||
TranslationOptionCollection*
|
||||
ConfusionNet::
|
||||
CreateTranslationOptionCollection() const
|
||||
{
|
||||
size_t maxNoTransOptPerCoverage
|
||||
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
float translationOptionThreshold
|
||||
= StaticData::Instance().GetTranslationOptionThreshold();
|
||||
TranslationOptionCollection *rv
|
||||
= new TranslationOptionCollectionConfusionNet
|
||||
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
assert(rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ public:
|
||||
DecodeGraph(size_t id)
|
||||
: m_id(id)
|
||||
, m_maxChartSpan(NOT_FOUND)
|
||||
, m_backoff(0)
|
||||
{}
|
||||
, m_backoff(0) {
|
||||
}
|
||||
|
||||
// for chart decoding
|
||||
DecodeGraph(size_t id, size_t maxChartSpan)
|
||||
|
@ -198,11 +198,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
||||
|
||||
const Word *wordIP = NULL;
|
||||
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
|
||||
const Word &tempWord = phraseFromIP.GetWord(i);
|
||||
if (!tempWord.IsEpsilon()) {
|
||||
wordIP = &tempWord;
|
||||
break;
|
||||
}
|
||||
const Word &tempWord = phraseFromIP.GetWord(i);
|
||||
if (!tempWord.IsEpsilon()) {
|
||||
wordIP = &tempWord;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// const WordsRange &range = inputPath.GetWordsRange();
|
||||
@ -237,7 +237,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
|
||||
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
|
@ -502,8 +502,8 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
|
||||
* phrase translated.
|
||||
*/
|
||||
FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
if (!m_enabled) return new BleuScoreState();
|
||||
|
||||
|
@ -116,27 +116,27 @@ public:
|
||||
size_t skip = 0) const;
|
||||
|
||||
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
|
||||
int featureID,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
int featureID,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
bool Enabled() const {
|
||||
return m_enabled;
|
||||
|
@ -11,8 +11,8 @@ namespace Moses
|
||||
class ConstrainedDecodingState : public FFState
|
||||
{
|
||||
public:
|
||||
ConstrainedDecodingState()
|
||||
{}
|
||||
ConstrainedDecodingState() {
|
||||
}
|
||||
|
||||
ConstrainedDecodingState(const Hypothesis &hypo);
|
||||
ConstrainedDecodingState(const ChartHypothesis &hypo);
|
||||
@ -42,23 +42,23 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
|
@ -20,8 +20,8 @@ class ControlRecombinationState : public FFState
|
||||
{
|
||||
public:
|
||||
ControlRecombinationState(const ControlRecombination &ff)
|
||||
:m_ff(ff)
|
||||
{}
|
||||
:m_ff(ff) {
|
||||
}
|
||||
|
||||
ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff);
|
||||
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
|
||||
@ -58,22 +58,22 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
|
@ -8,18 +8,18 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
CountNonTerms::CountNonTerms(const std::string &line)
|
||||
:StatelessFeatureFunction(line)
|
||||
,m_all(true)
|
||||
,m_sourceSyntax(false)
|
||||
,m_targetSyntax(false)
|
||||
:StatelessFeatureFunction(line)
|
||||
,m_all(true)
|
||||
,m_sourceSyntax(false)
|
||||
,m_targetSyntax(false)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
@ -27,33 +27,33 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
|
||||
size_t indScore = 0;
|
||||
|
||||
if (m_all) {
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const Word &word = targetPhrase.GetWord(i);
|
||||
if (word.IsNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const Word &word = targetPhrase.GetWord(i);
|
||||
if (word.IsNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
}
|
||||
|
||||
if (m_targetSyntax) {
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const Word &word = targetPhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const Word &word = targetPhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
}
|
||||
|
||||
if (m_sourceSyntax) {
|
||||
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
|
||||
const Word &word = sourcePhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
|
||||
const Word &word = sourcePhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
++indScore;
|
||||
}
|
||||
|
||||
scoreBreakdown.PlusEquals(this, scores);
|
||||
@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
|
||||
if (key == "all") {
|
||||
m_all = Scan<bool>(value);
|
||||
} else if (key == "source-syntax") {
|
||||
m_sourceSyntax = Scan<bool>(value);
|
||||
m_sourceSyntax = Scan<bool>(value);
|
||||
} else if (key == "target-syntax") {
|
||||
m_targetSyntax = Scan<bool>(value);
|
||||
m_targetSyntax = Scan<bool>(value);
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
@ -14,30 +14,30 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(
|
||||
const ChartHypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
|
@ -22,44 +22,44 @@ int CoveredReferenceState::Compare(const FFState& other) const
|
||||
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
|
||||
|
||||
if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
|
||||
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
} else {
|
||||
multiset<string>::const_iterator thisIt, otherIt;
|
||||
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
|
||||
thisIt != m_coveredRef.end();
|
||||
thisIt++, otherIt++) {
|
||||
thisIt != m_coveredRef.end();
|
||||
thisIt++, otherIt++) {
|
||||
if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
// return m_coveredRef == otherState.m_coveredRef;
|
||||
|
||||
|
||||
// if (m_coveredRef == otherState.m_coveredRef)
|
||||
// return 0;
|
||||
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
}
|
||||
|
||||
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
long id = input.GetTranslationId();
|
||||
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
|
||||
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
|
||||
multiset<string> covered;
|
||||
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
|
||||
refIt->second.begin(), refIt->second.end(),
|
||||
inserter(covered, covered.begin()));
|
||||
refIt->second.begin(), refIt->second.end(),
|
||||
inserter(covered, covered.begin()));
|
||||
vector<float> scores;
|
||||
scores.push_back(covered.size());
|
||||
|
||||
@ -67,7 +67,8 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
|
||||
estimatedFutureScore->Assign(this, scores);
|
||||
}
|
||||
|
||||
void CoveredReferenceFeature::Load() {
|
||||
void CoveredReferenceFeature::Load()
|
||||
{
|
||||
InputFileStream refFile(m_path);
|
||||
std::string line;
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
@ -76,7 +77,7 @@ void CoveredReferenceFeature::Load() {
|
||||
vector<string> words = Tokenize(line, " ");
|
||||
multiset<string> wordSet;
|
||||
// TODO make Tokenize work with other containers than vector
|
||||
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
|
||||
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
|
||||
m_refs.insert(make_pair(sentenceID++, wordSet));
|
||||
}
|
||||
}
|
||||
@ -107,15 +108,15 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
|
||||
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
|
||||
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
|
||||
set_difference(refIt->second.begin(), refIt->second.end(),
|
||||
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
|
||||
inserter(remaining, remaining.begin()));
|
||||
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
|
||||
inserter(remaining, remaining.begin()));
|
||||
|
||||
// which of the remaining words are present in the current phrase
|
||||
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
|
||||
multiset<string> newCovered;
|
||||
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
|
||||
remaining.begin(), remaining.end(),
|
||||
inserter(newCovered, newCovered.begin()));
|
||||
remaining.begin(), remaining.end(),
|
||||
inserter(newCovered, newCovered.begin()));
|
||||
|
||||
vector<float> estimateScore =
|
||||
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
|
||||
|
@ -52,20 +52,20 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
|
@ -63,30 +63,30 @@ public:
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void SetContainer(const DecodeStep *container) {
|
||||
m_container = container;
|
||||
|
@ -48,22 +48,22 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -93,12 +93,16 @@ public:
|
||||
}
|
||||
|
||||
static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
|
||||
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
|
||||
if (s_instance_map.find(name) == s_instance_map.end()) {
|
||||
return NULL;
|
||||
}
|
||||
return s_instance_map[name];
|
||||
}
|
||||
|
||||
static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
|
||||
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
|
||||
if (s_instance_map.find(name) == s_instance_map.end()) {
|
||||
return NULL;
|
||||
}
|
||||
return s_instance_map[name];
|
||||
}
|
||||
|
||||
@ -126,29 +130,29 @@ public:
|
||||
void Clear();
|
||||
|
||||
virtual void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void SetQueryType(size_t type);
|
||||
void SetScoreType(size_t type);
|
||||
|
@ -18,8 +18,8 @@ protected:
|
||||
public:
|
||||
ExternalFeatureState(int stateSize)
|
||||
:m_stateSize(stateSize)
|
||||
,m_data(NULL)
|
||||
{}
|
||||
,m_data(NULL) {
|
||||
}
|
||||
ExternalFeatureState(int stateSize, void *data);
|
||||
|
||||
~ExternalFeatureState() {
|
||||
@ -52,22 +52,22 @@ public:
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
|
@ -242,7 +242,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(SkeletonChangeInput);
|
||||
MOSES_FNAME(SkeletonTranslationOptionListFeature);
|
||||
MOSES_FNAME(SkeletonPT);
|
||||
|
||||
|
||||
#ifdef HAVE_VW
|
||||
MOSES_FNAME(VW);
|
||||
MOSES_FNAME(VWFeatureSourceBagOfWords);
|
||||
@ -322,22 +322,22 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
|
||||
|
||||
void FeatureRegistry::PrintFF() const
|
||||
{
|
||||
vector<string> ffs;
|
||||
std::cerr << "Available feature functions:" << std::endl;
|
||||
Map::const_iterator iter;
|
||||
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
||||
const string &ffName = iter->first;
|
||||
ffs.push_back(ffName);
|
||||
}
|
||||
vector<string> ffs;
|
||||
std::cerr << "Available feature functions:" << std::endl;
|
||||
Map::const_iterator iter;
|
||||
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
||||
const string &ffName = iter->first;
|
||||
ffs.push_back(ffName);
|
||||
}
|
||||
|
||||
vector<string>::const_iterator iterVec;
|
||||
std::sort(ffs.begin(), ffs.end());
|
||||
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
|
||||
const string &ffName = *iterVec;
|
||||
std::cerr << ffName << " ";
|
||||
}
|
||||
vector<string>::const_iterator iterVec;
|
||||
std::sort(ffs.begin(), ffs.end());
|
||||
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
|
||||
const string &ffName = *iterVec;
|
||||
std::cerr << ffName << " ";
|
||||
}
|
||||
|
||||
std::cerr << std::endl;
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -38,8 +38,8 @@ void FeatureFunction::Destroy()
|
||||
void FeatureFunction::CallChangeSource(InputType *&input)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
const FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.ChangeSource(input);
|
||||
const FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.ChangeSource(input);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,13 +111,13 @@ public:
|
||||
// may have more factors than actually need, but not guaranteed.
|
||||
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
|
||||
virtual void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const = 0;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const = 0;
|
||||
|
||||
// override this method if you want to change the input before decoding
|
||||
virtual void ChangeSource(InputType *&input) const
|
||||
{}
|
||||
virtual void ChangeSource(InputType *&input) const {
|
||||
}
|
||||
|
||||
// This method is called once all the translation options are retrieved from the phrase table, and
|
||||
// just before search.
|
||||
@ -127,12 +127,12 @@ public:
|
||||
// For pb models, stackvec is NULL.
|
||||
// No FF should set estimatedFutureScore in both overloads!
|
||||
virtual void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
|
||||
|
||||
// This method is called once all the translation options are retrieved from the phrase table, and
|
||||
// just before search.
|
||||
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
|
||||
@ -141,7 +141,7 @@ public:
|
||||
// For pb models, stackvec is NULL.
|
||||
// No FF should set estimatedFutureScore in both overloads!
|
||||
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const = 0;
|
||||
, const TranslationOptionList &translationOptionList) const = 0;
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
virtual void ReadParameters();
|
||||
|
@ -165,11 +165,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
|
||||
}
|
||||
|
||||
void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
|
||||
scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
|
||||
}
|
||||
|
||||
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const
|
||||
|
@ -71,29 +71,29 @@ public:
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
|
||||
// read optional punctuation and bias specifications
|
||||
if (spec.size() > 0) {
|
||||
if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) {
|
||||
std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
|
||||
<< "[context-type] [filename-src filename-tgt]";
|
||||
std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
|
||||
<< "[context-type] [filename-src filename-tgt]";
|
||||
//return false;
|
||||
}
|
||||
|
||||
@ -48,7 +48,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
|
||||
factors = Tokenize(modelSpec[i],"-");
|
||||
|
||||
if ( factors.size() != 2 ) {
|
||||
std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
|
||||
std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
|
||||
//return false;
|
||||
}
|
||||
|
||||
@ -60,10 +60,10 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
|
||||
if (restricted) {
|
||||
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
|
||||
if (!glmu->Load(filenameSource, filenameTarget)) {
|
||||
std::cerr << "Unable to load word lists for word translation feature from files "
|
||||
<< filenameSource
|
||||
<< " and "
|
||||
<< filenameTarget;
|
||||
std::cerr << "Unable to load word lists for word translation feature from files "
|
||||
<< filenameSource
|
||||
<< " and "
|
||||
<< filenameTarget;
|
||||
//return false;
|
||||
}
|
||||
}
|
||||
|
@ -82,31 +82,31 @@ public:
|
||||
//TODO: This implements the old interface, but cannot be updated because
|
||||
//it appears to be stateful
|
||||
void EvaluateWhenApplied(const Hypothesis& cur_hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection* ) const {
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection* ) const {
|
||||
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void AddFeature(ScoreComponentCollection* accumulator,
|
||||
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
|
||||
|
@ -19,33 +19,33 @@ public:
|
||||
}
|
||||
|
||||
virtual void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
virtual void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
/**
|
||||
* Same for chart-based features.
|
||||
**/
|
||||
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
@ -45,11 +45,11 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
|
||||
}
|
||||
|
||||
void InputFeature::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
if (m_legacy) {
|
||||
//binary phrase-table does input feature itself
|
||||
|
@ -42,28 +42,28 @@ public:
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
@ -4,236 +4,241 @@ namespace Moses
|
||||
{
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
{
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
{
|
||||
|
||||
if (len > 0) {
|
||||
m_value.assign(line, start, len);
|
||||
}
|
||||
if (len > 0) {
|
||||
m_value.assign(line, start, len);
|
||||
}
|
||||
}
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, const bool terminal):
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
{
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
{
|
||||
|
||||
size_t found = line.find_first_of("[] ");
|
||||
size_t found = line.find_first_of("[] ");
|
||||
|
||||
if (found == line.npos) {
|
||||
m_value = line;
|
||||
}
|
||||
else {
|
||||
AddSubTree(line, 0);
|
||||
}
|
||||
if (found == line.npos) {
|
||||
m_value = line;
|
||||
} else {
|
||||
AddSubTree(line, 0);
|
||||
}
|
||||
}
|
||||
|
||||
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
|
||||
size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
|
||||
{
|
||||
|
||||
char token = 0;
|
||||
size_t len = 0;
|
||||
char token = 0;
|
||||
size_t len = 0;
|
||||
|
||||
while (token != ']' && pos != std::string::npos)
|
||||
{
|
||||
size_t oldpos = pos;
|
||||
pos = line.find_first_of("[] ", pos);
|
||||
if (pos == std::string::npos) break;
|
||||
token = line[pos];
|
||||
len = pos-oldpos;
|
||||
while (token != ']' && pos != std::string::npos) {
|
||||
size_t oldpos = pos;
|
||||
pos = line.find_first_of("[] ", pos);
|
||||
if (pos == std::string::npos) break;
|
||||
token = line[pos];
|
||||
len = pos-oldpos;
|
||||
|
||||
if (token == '[') {
|
||||
if (!m_value.empty()) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
|
||||
pos = m_children.back()->AddSubTree(line, pos+1);
|
||||
}
|
||||
else {
|
||||
if (len > 0) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
}
|
||||
pos = AddSubTree(line, pos+1);
|
||||
}
|
||||
}
|
||||
else if (token == ' ' || token == ']') {
|
||||
if (len > 0 && m_value.empty()) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
}
|
||||
else if (len > 0) {
|
||||
m_isTerminal = false;
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
|
||||
}
|
||||
if (token == ' ') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_children.empty()) {
|
||||
m_isTerminal = false;
|
||||
if (token == '[') {
|
||||
if (!m_value.empty()) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
|
||||
pos = m_children.back()->AddSubTree(line, pos+1);
|
||||
} else {
|
||||
if (len > 0) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
}
|
||||
pos = AddSubTree(line, pos+1);
|
||||
}
|
||||
} else if (token == ' ' || token == ']') {
|
||||
if (len > 0 && m_value.empty()) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
} else if (len > 0) {
|
||||
m_isTerminal = false;
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
|
||||
}
|
||||
if (token == ' ') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
return line.size();
|
||||
if (!m_children.empty()) {
|
||||
m_isTerminal = false;
|
||||
}
|
||||
return std::min(line.size(),pos+1);
|
||||
}
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
return line.size();
|
||||
}
|
||||
return std::min(line.size(),pos+1);
|
||||
|
||||
}
|
||||
|
||||
std::string InternalTree::GetString(bool start) const {
|
||||
std::string InternalTree::GetString(bool start) const
|
||||
{
|
||||
|
||||
std::string ret = "";
|
||||
if (!start) {
|
||||
ret += " ";
|
||||
}
|
||||
std::string ret = "";
|
||||
if (!start) {
|
||||
ret += " ";
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "[";
|
||||
}
|
||||
if (!m_isTerminal) {
|
||||
ret += "[";
|
||||
}
|
||||
|
||||
ret += m_value;
|
||||
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
|
||||
{
|
||||
ret += (*it)->GetString(false);
|
||||
}
|
||||
ret += m_value;
|
||||
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
ret += (*it)->GetString(false);
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "]";
|
||||
}
|
||||
return ret;
|
||||
if (!m_isTerminal) {
|
||||
ret += "]";
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
|
||||
void InternalTree::Combine(const std::vector<TreePointer> &previous)
|
||||
{
|
||||
|
||||
std::vector<TreePointer>::iterator it;
|
||||
bool found = false;
|
||||
leafNT next_leafNT(this);
|
||||
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
*it = *it_prev;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
std::vector<TreePointer>::iterator it;
|
||||
bool found = false;
|
||||
leafNT next_leafNT(this);
|
||||
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
*it = *it_prev;
|
||||
} else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
@ -19,79 +19,79 @@ typedef int NTLabel;
|
||||
|
||||
class InternalTree
|
||||
{
|
||||
std::string m_value;
|
||||
NTLabel m_value_nt;
|
||||
std::vector<TreePointer> m_children;
|
||||
bool m_isTerminal;
|
||||
std::string m_value;
|
||||
NTLabel m_value_nt;
|
||||
std::vector<TreePointer> m_children;
|
||||
bool m_isTerminal;
|
||||
public:
|
||||
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
|
||||
InternalTree(const std::string & line, const bool terminal = false);
|
||||
InternalTree(const InternalTree & tree):
|
||||
m_value(tree.m_value),
|
||||
m_isTerminal(tree.m_isTerminal) {
|
||||
const std::vector<TreePointer> & children = tree.m_children;
|
||||
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(**it));
|
||||
}
|
||||
}
|
||||
size_t AddSubTree(const std::string & line, size_t start);
|
||||
|
||||
std::string GetString(bool start = true) const;
|
||||
void Combine(const std::vector<TreePointer> &previous);
|
||||
const std::string & GetLabel() const {
|
||||
return m_value;
|
||||
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
|
||||
InternalTree(const std::string & line, const bool terminal = false);
|
||||
InternalTree(const InternalTree & tree):
|
||||
m_value(tree.m_value),
|
||||
m_isTerminal(tree.m_isTerminal) {
|
||||
const std::vector<TreePointer> & children = tree.m_children;
|
||||
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(**it));
|
||||
}
|
||||
}
|
||||
size_t AddSubTree(const std::string & line, size_t start);
|
||||
|
||||
// optionally identify label by int instead of string;
|
||||
// allows abstraction if multiple nonterminal strings should map to same label.
|
||||
const NTLabel & GetNTLabel() const {
|
||||
return m_value_nt;
|
||||
}
|
||||
std::string GetString(bool start = true) const;
|
||||
void Combine(const std::vector<TreePointer> &previous);
|
||||
const std::string & GetLabel() const {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
void SetNTLabel(NTLabel value) {
|
||||
m_value_nt = value;
|
||||
}
|
||||
// optionally identify label by int instead of string;
|
||||
// allows abstraction if multiple nonterminal strings should map to same label.
|
||||
const NTLabel & GetNTLabel() const {
|
||||
return m_value_nt;
|
||||
}
|
||||
|
||||
size_t GetLength() const {
|
||||
return m_children.size();
|
||||
}
|
||||
std::vector<TreePointer> & GetChildren() {
|
||||
return m_children;
|
||||
}
|
||||
void SetNTLabel(NTLabel value) {
|
||||
m_value_nt = value;
|
||||
}
|
||||
|
||||
bool IsTerminal() const {
|
||||
return m_isTerminal;
|
||||
}
|
||||
size_t GetLength() const {
|
||||
return m_children.size();
|
||||
}
|
||||
std::vector<TreePointer> & GetChildren() {
|
||||
return m_children;
|
||||
}
|
||||
|
||||
bool IsLeafNT() const {
|
||||
return (!m_isTerminal && m_children.size() == 0);
|
||||
}
|
||||
bool IsTerminal() const {
|
||||
return m_isTerminal;
|
||||
}
|
||||
|
||||
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
|
||||
// can be used for formulating syntax constraints.
|
||||
bool IsLeafNT() const {
|
||||
return (!m_isTerminal && m_children.size() == 0);
|
||||
}
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
|
||||
// can be used for formulating syntax constraints.
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// pass vector of possible labels to search
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
// pass vector of possible labels to search
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
|
||||
};
|
||||
@ -101,77 +101,79 @@ class TreeState : public FFState
|
||||
TreePointer m_tree;
|
||||
public:
|
||||
TreeState(TreePointer tree)
|
||||
:m_tree(tree)
|
||||
{}
|
||||
|
||||
TreePointer GetTree() const {
|
||||
return m_tree;
|
||||
:m_tree(tree) {
|
||||
}
|
||||
|
||||
int Compare(const FFState& other) const {return 0;};
|
||||
TreePointer GetTree() const {
|
||||
return m_tree;
|
||||
}
|
||||
|
||||
int Compare(const FFState& other) const {
|
||||
return 0;
|
||||
};
|
||||
};
|
||||
|
||||
// Python-like generator that yields next nonterminal leaf on every call
|
||||
$generator(leafNT) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNT(InternalTree* root = 0): tree(root) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(it);
|
||||
}
|
||||
else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) { // normal pointer to same object that TreePointer points to
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
$generator(leafNT)
|
||||
{
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNT(InternalTree* root = 0): tree(root) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(it);
|
||||
} else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) { // normal pointer to same object that TreePointer points to
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
$stop;
|
||||
}
|
||||
$stop;
|
||||
};
|
||||
|
||||
|
||||
// Python-like generator that yields the parent of the next nonterminal leaf on every call
|
||||
$generator(leafNTParent) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNTParent(InternalTree* root = 0): tree(root) {}
|
||||
$emit(InternalTree*)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(tree);
|
||||
}
|
||||
else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) {
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
$generator(leafNTParent)
|
||||
{
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNTParent(InternalTree* root = 0): tree(root) {}
|
||||
$emit(InternalTree*)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(tree);
|
||||
} else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) {
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
$stop;
|
||||
}
|
||||
$stop;
|
||||
};
|
||||
|
||||
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
|
||||
$generator(leafNTPath) {
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
std::vector<InternalTree*> * path;
|
||||
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
path->push_back(tree);
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
path->push_back((*it).get());
|
||||
$yield(it);
|
||||
path->pop_back();
|
||||
}
|
||||
else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) {
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
$generator(leafNTPath)
|
||||
{
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
std::vector<InternalTree*> * path;
|
||||
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
path->push_back(tree);
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
path->push_back((*it).get());
|
||||
$yield(it);
|
||||
path->pop_back();
|
||||
} else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) {
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
path->pop_back();
|
||||
$stop;
|
||||
}
|
||||
path->pop_back();
|
||||
$stop;
|
||||
};
|
||||
|
||||
|
||||
|
@ -15,7 +15,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
|
||||
std::cerr << "Initializing LexicalReordering.." << std::endl;
|
||||
|
||||
map<string,string> sparseArgs;
|
||||
m_haveDefaultScores = false;
|
||||
m_haveDefaultScores = false;
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
|
||||
@ -36,7 +36,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
|
||||
for(size_t i=0; i<tokens.size(); i++) {
|
||||
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
|
||||
}
|
||||
m_haveDefaultScores = true;
|
||||
m_haveDefaultScores = true;
|
||||
} else {
|
||||
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
|
||||
}
|
||||
@ -84,8 +84,8 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
|
||||
}
|
||||
|
||||
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* out) const
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* out) const
|
||||
{
|
||||
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
|
||||
Scores score(GetNumScoreComponents(), 0);
|
||||
|
@ -46,33 +46,37 @@ public:
|
||||
Scores GetProb(const Phrase& f, const Phrase& e) const;
|
||||
|
||||
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection*) const {
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection*) const {
|
||||
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
|
||||
}
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
bool GetHaveDefaultScores() { return m_haveDefaultScores; }
|
||||
float GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
bool GetHaveDefaultScores() {
|
||||
return m_haveDefaultScores;
|
||||
}
|
||||
float GetDefaultScore( size_t i ) {
|
||||
return m_defaultScores[i];
|
||||
}
|
||||
|
||||
private:
|
||||
bool DecodeCondition(std::string s);
|
||||
|
@ -39,7 +39,7 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
|
||||
}
|
||||
|
||||
void LexicalReorderingConfiguration::ConfigureSparse
|
||||
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
|
||||
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
|
||||
{
|
||||
if (sparseArgs.size()) {
|
||||
m_sparse.reset(new SparseReordering(sparseArgs, producer));
|
||||
@ -95,7 +95,7 @@ LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string
|
||||
}
|
||||
|
||||
if (m_modelType == None) {
|
||||
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
|
||||
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@ -134,7 +134,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
|
||||
{
|
||||
// don't call this on a bidirectional object
|
||||
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
|
||||
"Unknown direction: " << m_direction);
|
||||
"Unknown direction: " << m_direction);
|
||||
const TranslationOption* relevantOpt = &topt;
|
||||
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
|
||||
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
|
||||
@ -146,8 +146,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
|
||||
const Scores &scoreSet = *cachedScores;
|
||||
if(m_configuration.CollapseScores()) {
|
||||
scores[m_offset] = scoreSet[m_offset + reoType];
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
|
||||
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
|
||||
}
|
||||
@ -158,8 +157,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
|
||||
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
|
||||
if(m_configuration.CollapseScores()) {
|
||||
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
|
||||
}
|
||||
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
|
||||
|
@ -124,7 +124,7 @@ protected:
|
||||
int ComparePrevScores(const TranslationOption *other) const;
|
||||
|
||||
//constants for the different type of reorderings (corresponding to indexes in the table file)
|
||||
public:
|
||||
public:
|
||||
static const ReorderingType M = 0; // monotonic
|
||||
static const ReorderingType NM = 1; // non-monotonic
|
||||
static const ReorderingType S = 1; // swap
|
||||
|
@ -16,10 +16,11 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
|
||||
const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
|
||||
{
|
||||
static string kSep = "-";
|
||||
static string name;
|
||||
ostringstream buf;
|
||||
@ -55,7 +56,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
|
||||
}
|
||||
|
||||
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
|
||||
: m_producer(producer)
|
||||
: m_producer(producer)
|
||||
{
|
||||
static const string kSource= "source";
|
||||
static const string kTarget = "target";
|
||||
@ -93,22 +94,24 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
|
||||
|
||||
}
|
||||
|
||||
void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
|
||||
void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
|
||||
{
|
||||
for (size_t type = SparseReorderingFeatureKey::Stack;
|
||||
type <= SparseReorderingFeatureKey::Between; ++type) {
|
||||
type <= SparseReorderingFeatureKey::Between; ++type) {
|
||||
for (size_t position = SparseReorderingFeatureKey::First;
|
||||
position <= SparseReorderingFeatureKey::Last; ++position) {
|
||||
position <= SparseReorderingFeatureKey::Last; ++position) {
|
||||
for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
|
||||
SparseReorderingFeatureKey key(
|
||||
index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
|
||||
static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
|
||||
static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
|
||||
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
|
||||
void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
|
||||
{
|
||||
ifstream fh(filename.c_str());
|
||||
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
|
||||
string line;
|
||||
@ -118,12 +121,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
|
||||
//TODO: StringPiece
|
||||
const Factor* factor = FactorCollection::Instance().AddFactor(line);
|
||||
pWordLists->back().second.insert(factor);
|
||||
PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
|
||||
PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
|
||||
void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
|
||||
{
|
||||
pClusterMaps->push_back(ClusterMap());
|
||||
pClusterMaps->back().first = id;
|
||||
util::FilePiece file(filename.c_str());
|
||||
@ -141,15 +145,16 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
|
||||
if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
|
||||
const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
|
||||
pClusterMaps->back().second[wordFactor] = idFactor;
|
||||
PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
|
||||
PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
|
||||
}
|
||||
}
|
||||
|
||||
void SparseReordering::AddFeatures(
|
||||
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
|
||||
const Word& word, SparseReorderingFeatureKey::Position position,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
ScoreComponentCollection* scores) const {
|
||||
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
|
||||
const Word& word, SparseReorderingFeatureKey::Position position,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
ScoreComponentCollection* scores) const
|
||||
{
|
||||
|
||||
const Factor* wordFactor = word.GetFactor(0);
|
||||
|
||||
@ -186,18 +191,18 @@ void SparseReordering::AddFeatures(
|
||||
}
|
||||
|
||||
void SparseReordering::CopyScores(
|
||||
const TranslationOption& currentOpt,
|
||||
const TranslationOption* previousOpt,
|
||||
const InputType& input,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
LexicalReorderingConfiguration::Direction direction,
|
||||
ScoreComponentCollection* scores) const
|
||||
const TranslationOption& currentOpt,
|
||||
const TranslationOption* previousOpt,
|
||||
const InputType& input,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
LexicalReorderingConfiguration::Direction direction,
|
||||
ScoreComponentCollection* scores) const
|
||||
{
|
||||
if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
|
||||
(reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
|
||||
reoType == LexicalReorderingState::DR)) {
|
||||
reoType == LexicalReorderingState::DR)) {
|
||||
size_t gapStart, gapEnd;
|
||||
//NB: Using a static cast for speed, but could be nasty if
|
||||
//NB: Using a static cast for speed, but could be nasty if
|
||||
//using non-sentence input
|
||||
const Sentence& sentence = static_cast<const Sentence&>(input);
|
||||
const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
|
||||
@ -217,9 +222,9 @@ void SparseReordering::CopyScores(
|
||||
}
|
||||
assert(gapStart < gapEnd);
|
||||
for (size_t i = gapStart; i < gapEnd; ++i) {
|
||||
AddFeatures(SparseReorderingFeatureKey::Between,
|
||||
SparseReorderingFeatureKey::Source, sentence.GetWord(i),
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
AddFeatures(SparseReorderingFeatureKey::Between,
|
||||
SparseReorderingFeatureKey::Source, sentence.GetWord(i),
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
}
|
||||
}
|
||||
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
|
||||
@ -240,11 +245,11 @@ void SparseReordering::CopyScores(
|
||||
}
|
||||
const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
|
||||
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
|
||||
const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
|
||||
const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
|
||||
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
SparseReorderingFeatureKey::First, reoType, scores);
|
||||
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
|
||||
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
/**
|
||||
Configuration of sparse reordering:
|
||||
|
||||
|
||||
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
|
||||
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
|
||||
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
|
||||
@ -38,7 +38,7 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/**
|
||||
/**
|
||||
* Used to store pre-calculated feature names.
|
||||
**/
|
||||
struct SparseReorderingFeatureKey {
|
||||
@ -51,17 +51,17 @@ struct SparseReorderingFeatureKey {
|
||||
LexicalReorderingState::ReorderingType reoType;
|
||||
|
||||
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
|
||||
Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
|
||||
Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
|
||||
: id(id_), type(type_), word(word_), isCluster(isCluster_),
|
||||
position(position_), side(side_), reoType(reoType_)
|
||||
{}
|
||||
position(position_), side(side_), reoType(reoType_) {
|
||||
}
|
||||
|
||||
const std::string& Name(const std::string& wordListId) ;
|
||||
const std::string& Name(const std::string& wordListId) ;
|
||||
};
|
||||
|
||||
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
|
||||
std::size_t operator()(const SparseReorderingFeatureKey& key) const {
|
||||
//TODO: can we just hash the memory?
|
||||
//TODO: can we just hash the memory?
|
||||
//not sure, there could be random padding
|
||||
std::size_t seed = 0;
|
||||
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
|
||||
@ -76,7 +76,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
|
||||
};
|
||||
|
||||
struct EqualsSparseReorderingFeatureKey :
|
||||
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
|
||||
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
|
||||
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
|
||||
//TODO: Can we just compare the memory?
|
||||
return left.id == right.id && left.type == right.type && left.word == right.word &&
|
||||
@ -89,14 +89,14 @@ class SparseReordering
|
||||
{
|
||||
public:
|
||||
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
|
||||
|
||||
|
||||
//If direction is backward the options will be different, for forward they will be the same
|
||||
void CopyScores(const TranslationOption& currentOpt,
|
||||
const TranslationOption* previousOpt,
|
||||
const InputType& input,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
LexicalReorderingConfiguration::Direction direction,
|
||||
ScoreComponentCollection* scores) const ;
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
LexicalReorderingConfiguration::Direction direction,
|
||||
ScoreComponentCollection* scores) const ;
|
||||
|
||||
private:
|
||||
const LexicalReordering* m_producer;
|
||||
@ -113,14 +113,14 @@ private:
|
||||
FeatureMap m_featureMap;
|
||||
|
||||
void ReadWordList(const std::string& filename, const std::string& id,
|
||||
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
|
||||
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
|
||||
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
|
||||
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
|
||||
|
||||
void AddFeatures(
|
||||
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
|
||||
const Word& word, SparseReorderingFeatureKey::Position position,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
const Word& word, SparseReorderingFeatureKey::Position position,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
ScoreComponentCollection* scores) const;
|
||||
|
||||
};
|
||||
|
@ -14,10 +14,10 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
|
||||
:StatelessFeatureFunction(1, line)
|
||||
,m_maxSpan(2)
|
||||
,m_glueTargetLHSStr("S")
|
||||
,m_glueTargetLHS(true)
|
||||
:StatelessFeatureFunction(1, line)
|
||||
,m_maxSpan(2)
|
||||
,m_glueTargetLHSStr("S")
|
||||
,m_glueTargetLHS(true)
|
||||
{
|
||||
m_tuneable = false;
|
||||
ReadParameters();
|
||||
@ -28,25 +28,25 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
|
||||
}
|
||||
|
||||
void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
targetPhrase.SetRuleSource(source);
|
||||
}
|
||||
|
||||
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
const Word &targetLHS = targetPhrase.GetTargetLHS();
|
||||
|
||||
if (targetLHS == m_glueTargetLHS) {
|
||||
// don't delete glue rules
|
||||
return;
|
||||
// don't delete glue rules
|
||||
return;
|
||||
}
|
||||
|
||||
const Phrase *source = targetPhrase.GetRuleSource();
|
||||
@ -54,17 +54,17 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
|
||||
float score = 0;
|
||||
|
||||
if (source->Front().IsNonTerminal()) {
|
||||
const ChartCellLabel &cell = *stackVec->front();
|
||||
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
|
||||
score = - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
const ChartCellLabel &cell = *stackVec->front();
|
||||
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
|
||||
score = - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
if (source->Back().IsNonTerminal()) {
|
||||
const ChartCellLabel &cell = *stackVec->back();
|
||||
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
|
||||
score = - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
const ChartCellLabel &cell = *stackVec->back();
|
||||
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
|
||||
score = - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
|
||||
void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "max-span") {
|
||||
m_maxSpan = Scan<int>(value);
|
||||
m_maxSpan = Scan<int>(value);
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
@ -84,8 +84,8 @@ void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::s
|
||||
|
||||
std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const
|
||||
{
|
||||
std::vector<float> ret(1, 1);
|
||||
return ret;
|
||||
std::vector<float> ret(1, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user