This commit is contained in:
Hieu Hoang 2015-01-14 11:07:42 +00:00
parent 91cb549ccf
commit 05ead45e71
406 changed files with 19495 additions and 20485 deletions

View File

@ -153,19 +153,19 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
break; break;
} }
case 4: { case 4: {
// store only the 3rd one (rule count) // store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok); float val = Moses::Scan<float>(tok);
misc[0] = val; misc[0] = val;
break; break;
} }
case 5: { case 5: {
// sparse features // sparse features
sparseFeatures << tok << " "; sparseFeatures << tok << " ";
break; break;
} }
case 6: { case 6: {
property << tok << " "; property << tok << " ";
break; break;
} }
default: default:
cerr << "ERROR in line " << line << endl; cerr << "ERROR in line " << line << endl;

View File

@ -166,10 +166,10 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
size_t propSize = m_property.size(); size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64) // file pos (phrase id) size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores + sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string + sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string + sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded); char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded); //memset(mem, 0, memNeeded);
@ -350,13 +350,13 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
bytesRead += sizeof(UINT64); bytesRead += sizeof(UINT64);
if (strSize) { if (strSize) {
char *mem = (char*) malloc(strSize + 1); char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0'; mem[strSize] = '\0';
fileTPColl.read(mem, strSize); fileTPColl.read(mem, strSize);
outStr = string(mem); outStr = string(mem);
free(mem); free(mem);
bytesRead += strSize; bytesRead += strSize;
} }
return bytesRead; return bytesRead;

View File

@ -113,14 +113,12 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const; virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
void SetProperty(const std::string &value) void SetProperty(const std::string &value) {
{ m_property = value;
m_property = value;
} }
void SetSparseFeatures(const std::string &value) void SetSparseFeatures(const std::string &value) {
{ m_sparseFeatures = value;
m_sparseFeatures = value;
} }
}; };

View File

@ -105,18 +105,17 @@ void Word::ConvertToMoses(
overwrite = Moses::Word(m_isNonTerminal); overwrite = Moses::Word(m_isNonTerminal);
if (m_isNonTerminal) { if (m_isNonTerminal) {
const std::string &tok = vocab.GetString(m_vocabId); const std::string &tok = vocab.GetString(m_vocabId);
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal)); overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
} } else {
else { // TODO: this conversion should have been done at load time.
// TODO: this conversion should have been done at load time. util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal)); overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
} }
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
} }
} }

View File

@ -7,7 +7,8 @@ size_t lookup( string );
vector<string> tokenize( const char input[] ); vector<string> tokenize( const char input[] );
SuffixArray suffixArray; SuffixArray suffixArray;
int main(int argc, char* argv[]) { int main(int argc, char* argv[])
{
// handle parameters // handle parameters
string query; string query;
string fileNameSuffix; string fileNameSuffix;
@ -95,14 +96,14 @@ int main(int argc, char* argv[]) {
} }
cout << lookup( query ) << endl; cout << lookup( query ) << endl;
} }
} } else if (queryFlag) {
else if (queryFlag) {
cout << lookup( query ) << endl; cout << lookup( query ) << endl;
} }
return 0; return 0;
} }
size_t lookup( string query ) { size_t lookup( string query )
{
cerr << "query is " << query << endl; cerr << "query is " << query << endl;
vector< string > queryString = tokenize( query.c_str() ); vector< string > queryString = tokenize( query.c_str() );
return suffixArray.Count( queryString ); return suffixArray.Count( queryString );

View File

@ -61,7 +61,8 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value; m_fvector[id] = value;
} }
void SparseVector::set(size_t id, FeatureStatsType value) { void SparseVector::set(size_t id, FeatureStatsType value)
{
assert(m_id_to_name.size() > id); assert(m_id_to_name.size() > id);
m_fvector[id] = value; m_fvector[id] = value;
} }
@ -204,7 +205,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats() FeatureStats::~FeatureStats()
{ {
delete [] m_array; delete [] m_array;
} }
void FeatureStats::Copy(const FeatureStats &stats) void FeatureStats::Copy(const FeatureStats &stats)

View File

@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std; using namespace std;
namespace MosesTuning { namespace MosesTuning
{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) { std::ostream& operator<<(std::ostream& out, const WordVec& wordVec)
{
out << "["; out << "[";
for (size_t i = 0; i < wordVec.size(); ++i) { for (size_t i = 0; i < wordVec.size(); ++i) {
out << wordVec[i]->first; out << wordVec[i]->first;
@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
} }
void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) { void ReferenceSet::Load(const vector<string>& files, Vocab& vocab)
{
for (size_t i = 0; i < files.size(); ++i) { for (size_t i = 0; i < files.size(); ++i) {
util::FilePiece fh(files[i].c_str()); util::FilePiece fh(files[i].c_str());
size_t sentenceId = 0; size_t sentenceId = 0;
@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
} catch (util::EndOfFileException &e) { } catch (util::EndOfFileException &e) {
break; break;
} }
AddLine(sentenceId, line, vocab); AddLine(sentenceId, line, vocab);
++sentenceId; ++sentenceId;
} }
} }
} }
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) { void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
{
//cerr << line << endl; //cerr << line << endl;
NgramCounter ngramCounts; NgramCounter ngramCounts;
list<WordVec> openNgrams; list<WordVec> openNgrams;
@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
openNgrams.push_front(WordVec()); openNgrams.push_front(WordVec());
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) { for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
k->push_back(nextTok); k->push_back(nextTok);
++ngramCounts[*k]; ++ngramCounts[*k];
} }
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back(); if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
} }
//merge into overall ngram map //merge into overall ngram map
for (NgramCounter::const_iterator ni = ngramCounts.begin(); for (NgramCounter::const_iterator ni = ngramCounts.begin();
ni != ngramCounts.end(); ++ni) { ni != ngramCounts.end(); ++ni) {
size_t count = ni->second; size_t count = ni->second;
//cerr << *ni << " " << count << endl; //cerr << *ni << " " << count << endl;
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1); if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
//cerr << endl; //cerr << endl;
} }
size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const { size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const
{
const NgramMap& ngramCounts = ngramCounts_.at(sentenceId); const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
NgramMap::const_iterator ngi = ngramCounts.find(ngram); NgramMap::const_iterator ngi = ngramCounts.find(ngram);
if (ngi == ngramCounts.end()) return 0; if (ngi == ngramCounts.end()) return 0;
@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {} VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const { void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
{
for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) { for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
//cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl; //cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
size_t order = ngi->first.size(); size_t order = ngi->first.size();
@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
} }
} }
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const { size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
{
size_t targetLength = 0; size_t targetLength = 0;
for (size_t i = 0; i < edge.Words().size(); ++i) { for (size_t i = 0; i < edge.Words().size(); ++i) {
const Vocab::Entry* word = edge.Words()[i]; const Vocab::Entry* word = edge.Words()[i];
@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
return targetLength; return targetLength;
} }
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) { FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
{
NgramCounter ngramCounts; NgramCounter ngramCounts;
size_t childId = 0; size_t childId = 0;
size_t wordId = 0; size_t wordId = 0;
@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bool inRightContext = false; bool inRightContext = false;
list<WordVec> openNgrams; list<WordVec> openNgrams;
const Vocab::Entry* currentWord = NULL; const Vocab::Entry* currentWord = NULL;
while (wordId < edge.Words().size()) { while (wordId < edge.Words().size()) {
currentWord = edge.Words()[wordId]; currentWord = edge.Words()[wordId];
if (currentWord != NULL) { if (currentWord != NULL) {
++wordId; ++wordId;
@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
} }
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back(); if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
} }
//Collect matches //Collect matches
//This edge //This edge
//cerr << "edge ngrams" << endl; //cerr << "edge ngrams" << endl;
@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j]; bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
} }
} }
FeatureStatsType sourceLength = head.SourceCovered(); FeatureStatsType sourceLength = head.SourceCovered();
size_t referenceLength = references_.Length(sentenceId_); size_t referenceLength = references_.Length(sentenceId_);
FeatureStatsType effectiveReferenceLength = FeatureStatsType effectiveReferenceLength =
sourceLength / totalSourceLength_ * referenceLength; sourceLength / totalSourceLength_ * referenceLength;
bleuStats[bleuStats.size()-1] = effectiveReferenceLength; bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
//backgroundBleu_[backgroundBleu_.size()-1] = //backgroundBleu_[backgroundBleu_.size()-1] =
// backgroundRefLength_ * sourceLength / totalSourceLength_; // backgroundRefLength_ * sourceLength / totalSourceLength_;
FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_); FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
return bleu; return bleu;
} }
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) { void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
{
//TODO: Maybe more efficient to absorb into the Score() method //TODO: Maybe more efficient to absorb into the Score() method
VertexState& vertexState = vertexStates_[vertexId]; VertexState& vertexState = vertexStates_[vertexId];
//cerr << "Updating state for " << vertexId << endl; //cerr << "Updating state for " << vertexId << endl;
//leftContext //leftContext
int wi = 0; int wi = 0;
const VertexState* childState = NULL; const VertexState* childState = NULL;
@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
//start of child state //start of child state
childState = &(vertexStates_[winnerEdge.Children()[childi++]]); childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
contexti = 0; contexti = 0;
} }
if ((size_t)contexti < childState->leftContext.size()) { if ((size_t)contexti < childState->leftContext.size()) {
vertexState.leftContext.push_back(childState->leftContext[contexti++]); vertexState.leftContext.push_back(childState->leftContext[contexti++]);
} else { } else {
//end of child context //end of child context
childState = NULL; childState = NULL;
@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
* Recurse through back pointers * Recurse through back pointers
**/ **/
static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps, static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
HgHypothesis* bestHypo) { HgHypothesis* bestHypo)
{
//cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl; //cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
//UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end"); //UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
if (!bps[vertexId].first) return; if (!bps[vertexId].first) return;
@ -334,7 +344,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
} }
} }
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo) void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
{ {
BackPointer init(NULL,kMinScore); BackPointer init(NULL,kMinScore);
vector<BackPointer> backPointers(graph.VertexSize(),init); vector<BackPointer> backPointers(graph.VertexSize(),init);
@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
//UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges"); //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
//If no incoming edges, vertex is a dead end //If no incoming edges, vertex is a dead end
backPointers[vi].first = NULL; backPointers[vi].first = NULL;
backPointers[vi].second = kMinScore; backPointers[vi].second = kMinScore;
} else { } else {
//cerr << "\nVertex: " << vi << endl; //cerr << "\nVertex: " << vi << endl;
for (size_t ei = 0; ei < incoming.size(); ++ei) { for (size_t ei = 0; ei < incoming.size(); ++ei) {
@ -362,10 +372,10 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
incomingScore = max(incomingScore + backPointers[childId].second, kMinScore); incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
} }
vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1); vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
// cerr << "Score: " << incomingScore << " Bleu: "; // cerr << "Score: " << incomingScore << " Bleu: ";
// if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;} // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
FeatureStatsType totalScore = incomingScore; FeatureStatsType totalScore = incomingScore;
if (bleuWeight) { if (bleuWeight) {
FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats); FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
if (isnan(bleuScore)) { if (isnan(bleuScore)) {
cerr << "WARN: bleu score undefined" << endl; cerr << "WARN: bleu score undefined" << endl;
@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
} }
//UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?"); //UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
totalScore += bleuWeight * bleuScore; totalScore += bleuWeight * bleuScore;
// cerr << bleuScore << " Total: " << incomingScore << endl << endl; // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
//cerr << "is " << incomingScore << " bs " << bleuScore << endl; //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
} }
if (totalScore >= winnerScore) { if (totalScore >= winnerScore) {

View File

@ -27,7 +27,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BleuScorer.h" #include "BleuScorer.h"
#include "Hypergraph.h" #include "Hypergraph.h"
namespace MosesTuning { namespace MosesTuning
{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec); std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
@ -47,18 +48,21 @@ struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&,
typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter; typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
class ReferenceSet { class ReferenceSet
{
public: public:
void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab); void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
void Load(const std::vector<std::string>& files, Vocab& vocab); void Load(const std::vector<std::string>& files, Vocab& vocab);
size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const; size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
size_t Length(size_t sentenceId) const {return lengths_[sentenceId];} size_t Length(size_t sentenceId) const {
return lengths_[sentenceId];
}
private: private:
//ngrams to (clipped,unclipped) counts //ngrams to (clipped,unclipped) counts
@ -80,31 +84,32 @@ struct VertexState {
/** /**
* Used to score an rule (ie edge) when we are applying it. * Used to score an rule (ie edge) when we are applying it.
**/ **/
class HgBleuScorer { class HgBleuScorer
public: {
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu): public:
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu), references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) { backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
vertexStates_.resize(graph.VertexSize()); vertexStates_.resize(graph.VertexSize());
totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered(); totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
} }
FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ; FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats); void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
private: private:
const ReferenceSet& references_; const ReferenceSet& references_;
std::vector<VertexState> vertexStates_; std::vector<VertexState> vertexStates_;
size_t sentenceId_; size_t sentenceId_;
size_t totalSourceLength_; size_t totalSourceLength_;
const Graph& graph_; const Graph& graph_;
std::vector<FeatureStatsType> backgroundBleu_; std::vector<FeatureStatsType> backgroundBleu_;
FeatureStatsType backgroundRefLength_; FeatureStatsType backgroundRefLength_;
void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const; void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
size_t GetTargetLength(const Edge& edge) const; size_t GetTargetLength(const Edge& edge) const;
}; };
struct HgHypothesis { struct HgHypothesis {

View File

@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
Vocab vocab; Vocab vocab;
WordVec words; WordVec words;
string wordStrings[] = string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"}; {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i])))); words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
} }
@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
Vocab vocab; Vocab vocab;
WordVec words; WordVec words;
string wordStrings[] = string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"}; {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) { for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i])))); words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
} }

View File

@ -34,11 +34,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std; using namespace std;
namespace fs = boost::filesystem; namespace fs = boost::filesystem;
namespace MosesTuning { namespace MosesTuning
{
static const ValType BLEU_RATIO = 5; static const ValType BLEU_RATIO = 5;
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) { ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
{
vector<ValType> stats(scorer_->NumberOfScores(),0); vector<ValType> stats(scorer_->NumberOfScores(),0);
for(reset(); !finished(); next()) { for(reset(); !finished(); next()) {
vector<ValType> sent; vector<ValType> sent;
@ -51,13 +53,14 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
} }
NbestHopeFearDecoder::NbestHopeFearDecoder( NbestHopeFearDecoder::NbestHopeFearDecoder(
const vector<string>& featureFiles, const vector<string>& featureFiles,
const vector<string>& scoreFiles, const vector<string>& scoreFiles,
bool streaming, bool streaming,
bool no_shuffle, bool no_shuffle,
bool safe_hope, bool safe_hope,
Scorer* scorer Scorer* scorer
) : safe_hope_(safe_hope) { ) : safe_hope_(safe_hope)
{
scorer_ = scorer; scorer_ = scorer;
if (streaming) { if (streaming) {
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles)); train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
@ -67,25 +70,29 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
} }
void NbestHopeFearDecoder::next() { void NbestHopeFearDecoder::next()
{
train_->next(); train_->next();
} }
bool NbestHopeFearDecoder::finished() { bool NbestHopeFearDecoder::finished()
{
return train_->finished(); return train_->finished();
} }
void NbestHopeFearDecoder::reset() { void NbestHopeFearDecoder::reset()
{
train_->reset(); train_->reset();
} }
void NbestHopeFearDecoder::HopeFear( void NbestHopeFearDecoder::HopeFear(
const std::vector<ValType>& backgroundBleu, const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv, const MiraWeightVector& wv,
HopeFearData* hopeFear HopeFearData* hopeFear
) { )
{
// Hope / fear decode // Hope / fear decode
ValType hope_scale = 1.0; ValType hope_scale = 1.0;
size_t hope_index=0, fear_index=0, model_index=0; size_t hope_index=0, fear_index=0, model_index=0;
@ -134,7 +141,8 @@ void NbestHopeFearDecoder::HopeFear(
hopeFear->hopeFearEqual = (hope_index == fear_index); hopeFear->hopeFearEqual = (hope_index == fear_index);
} }
void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) { void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
{
// Find max model // Find max model
size_t max_index=0; size_t max_index=0;
ValType max_score=0; ValType max_score=0;
@ -152,18 +160,19 @@ void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValTy
HypergraphHopeFearDecoder::HypergraphHopeFearDecoder HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
( (
const string& hypergraphDir, const string& hypergraphDir,
const vector<string>& referenceFiles, const vector<string>& referenceFiles,
size_t num_dense, size_t num_dense,
bool streaming, bool streaming,
bool no_shuffle, bool no_shuffle,
bool safe_hope, bool safe_hope,
size_t hg_pruning, size_t hg_pruning,
const MiraWeightVector& wv, const MiraWeightVector& wv,
Scorer* scorer Scorer* scorer
) : ) :
num_dense_(num_dense) { num_dense_(num_dense)
{
UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs"); UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist"); UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
@ -177,17 +186,17 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
static const string kWeights = "weights"; static const string kWeights = "weights";
fs::directory_iterator dend; fs::directory_iterator dend;
size_t fileCount = 0; size_t fileCount = 0;
cerr << "Reading hypergraphs" << endl; cerr << "Reading hypergraphs" << endl;
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) { for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
const fs::path& hgpath = di->path(); const fs::path& hgpath = di->path();
if (hgpath.filename() == kWeights) continue; if (hgpath.filename() == kWeights) continue;
// cerr << "Reading " << hgpath.filename() << endl; // cerr << "Reading " << hgpath.filename() << endl;
Graph graph(vocab_); Graph graph(vocab_);
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string()); size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str())); util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
//util::FilePiece file(di->path().string().c_str()); //util::FilePiece file(di->path().string().c_str());
util::FilePiece file(fd.release()); util::FilePiece file(fd.release());
ReadGraph(file,graph); ReadGraph(file,graph);
//cerr << "ref length " << references_.Length(id) << endl; //cerr << "ref length " << references_.Length(id) << endl;
@ -196,7 +205,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
prunedGraph.reset(new Graph(vocab_)); prunedGraph.reset(new Graph(vocab_));
graph.Prune(prunedGraph.get(), weights, edgeCount); graph.Prune(prunedGraph.get(), weights, edgeCount);
graphs_[id] = prunedGraph; graphs_[id] = prunedGraph;
// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl; // cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
++fileCount; ++fileCount;
if (fileCount % 10 == 0) cerr << "."; if (fileCount % 10 == 0) cerr << ".";
if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n"; if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
@ -211,23 +220,27 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
} }
void HypergraphHopeFearDecoder::reset() { void HypergraphHopeFearDecoder::reset()
{
sentenceIdIter_ = sentenceIds_.begin(); sentenceIdIter_ = sentenceIds_.begin();
} }
void HypergraphHopeFearDecoder::next() { void HypergraphHopeFearDecoder::next()
{
sentenceIdIter_++; sentenceIdIter_++;
} }
bool HypergraphHopeFearDecoder::finished() { bool HypergraphHopeFearDecoder::finished()
{
return sentenceIdIter_ == sentenceIds_.end(); return sentenceIdIter_ == sentenceIds_.end();
} }
void HypergraphHopeFearDecoder::HopeFear( void HypergraphHopeFearDecoder::HopeFear(
const vector<ValType>& backgroundBleu, const vector<ValType>& backgroundBleu,
const MiraWeightVector& wv, const MiraWeightVector& wv,
HopeFearData* hopeFear HopeFearData* hopeFear
) { )
{
size_t sentenceId = *sentenceIdIter_; size_t sentenceId = *sentenceIdIter_;
SparseVector weights; SparseVector weights;
wv.ToSparse(&weights); wv.ToSparse(&weights);
@ -247,12 +260,12 @@ void HypergraphHopeFearDecoder::HopeFear(
Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo); Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
// where model score is having far more influence than BLEU // where model score is having far more influence than BLEU
// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale) // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
// hope_scale = abs(hope_bleu) / abs(hope_model); // hope_scale = abs(hope_bleu) / abs(hope_model);
// else break; // else break;
//TODO: Don't currently get model and bleu so commented this out for now. //TODO: Don't currently get model and bleu so commented this out for now.
break; break;
} }
@ -311,15 +324,16 @@ void HypergraphHopeFearDecoder::HopeFear(
if (hopeFear->hopeFearEqual) { if (hopeFear->hopeFearEqual) {
for (size_t i = 0; i < fearStats.size(); ++i) { for (size_t i = 0; i < fearStats.size(); ++i) {
if (fearStats[i] != hopeFear->hopeStats[i]) { if (fearStats[i] != hopeFear->hopeStats[i]) {
hopeFear->hopeFearEqual = false; hopeFear->hopeFearEqual = false;
break; break;
} }
} }
} }
hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures); hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
} }
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) { void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
{
assert(!finished()); assert(!finished());
HgHypothesis bestHypo; HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_; size_t sentenceId = *sentenceIdIter_;

View File

@ -35,7 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// the n-best list and lattice/hypergraph implementations // the n-best list and lattice/hypergraph implementations
// //
namespace MosesTuning { namespace MosesTuning
{
class Scorer; class Scorer;
@ -44,7 +45,7 @@ struct HopeFearData {
MiraFeatureVector modelFeatures; MiraFeatureVector modelFeatures;
MiraFeatureVector hopeFeatures; MiraFeatureVector hopeFeatures;
MiraFeatureVector fearFeatures; MiraFeatureVector fearFeatures;
std::vector<float> modelStats; std::vector<float> modelStats;
std::vector<float> hopeStats; std::vector<float> hopeStats;
@ -55,7 +56,8 @@ struct HopeFearData {
}; };
//Abstract base class //Abstract base class
class HopeFearDecoder { class HopeFearDecoder
{
public: public:
//iterator methods //iterator methods
virtual void reset() = 0; virtual void reset() = 0;
@ -68,10 +70,10 @@ public:
* Calculate hope, fear and model hypotheses * Calculate hope, fear and model hypotheses
**/ **/
virtual void HopeFear( virtual void HopeFear(
const std::vector<ValType>& backgroundBleu, const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv, const MiraWeightVector& wv,
HopeFearData* hopeFear HopeFearData* hopeFear
) = 0; ) = 0;
/** Max score decoding */ /** Max score decoding */
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
@ -86,25 +88,26 @@ protected:
/** Gets hope-fear from nbest lists */ /** Gets hope-fear from nbest lists */
class NbestHopeFearDecoder : public virtual HopeFearDecoder { class NbestHopeFearDecoder : public virtual HopeFearDecoder
{
public: public:
NbestHopeFearDecoder(const std::vector<std::string>& featureFiles, NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
const std::vector<std::string>& scoreFiles, const std::vector<std::string>& scoreFiles,
bool streaming, bool streaming,
bool no_shuffle, bool no_shuffle,
bool safe_hope, bool safe_hope,
Scorer* scorer Scorer* scorer
); );
virtual void reset(); virtual void reset();
virtual void next(); virtual void next();
virtual bool finished(); virtual bool finished();
virtual void HopeFear( virtual void HopeFear(
const std::vector<ValType>& backgroundBleu, const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv, const MiraWeightVector& wv,
HopeFearData* hopeFear HopeFearData* hopeFear
); );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats); virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
@ -117,29 +120,30 @@ private:
/** Gets hope-fear from hypergraphs */ /** Gets hope-fear from hypergraphs */
class HypergraphHopeFearDecoder : public virtual HopeFearDecoder { class HypergraphHopeFearDecoder : public virtual HopeFearDecoder
{
public: public:
HypergraphHopeFearDecoder( HypergraphHopeFearDecoder(
const std::string& hypergraphDir, const std::string& hypergraphDir,
const std::vector<std::string>& referenceFiles, const std::vector<std::string>& referenceFiles,
size_t num_dense, size_t num_dense,
bool streaming, bool streaming,
bool no_shuffle, bool no_shuffle,
bool safe_hope, bool safe_hope,
size_t hg_pruning, size_t hg_pruning,
const MiraWeightVector& wv, const MiraWeightVector& wv,
Scorer* scorer_ Scorer* scorer_
); );
virtual void reset(); virtual void reset();
virtual void next(); virtual void next();
virtual bool finished(); virtual bool finished();
virtual void HopeFear( virtual void HopeFear(
const std::vector<ValType>& backgroundBleu, const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv, const MiraWeightVector& wv,
HopeFearData* hopeFear HopeFearData* hopeFear
); );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats); virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);

View File

@ -55,7 +55,8 @@ void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
} }
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) { void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc)
{
if (tree->GetLength() > 0) { if (tree->GetLength() > 0) {
string head = getHead(tree); string head = getHead(tree);
@ -64,8 +65,7 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) { for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, history, hwc); extractHeadWordChain(*it, history, hwc);
} }
} } else {
else {
vector<string> new_history(kHwcmOrder); vector<string> new_history(kHwcmOrder);
new_history[0] = head; new_history[0] = head;
hwc[0][head]++; hwc[0][head]++;
@ -85,11 +85,11 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
} }
} }
string HwcmScorer::getHead(TreePointer tree) { string HwcmScorer::getHead(TreePointer tree)
{
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head) // assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned // if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
{
TreePointer child = *it; TreePointer child = *it;
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) { if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {

View File

@ -31,18 +31,22 @@ using namespace std;
static const string kBOS = "<s>"; static const string kBOS = "<s>";
static const string kEOS = "</s>"; static const string kEOS = "</s>";
namespace MosesTuning { namespace MosesTuning
{
StringPiece NextLine(util::FilePiece& from) { StringPiece NextLine(util::FilePiece& from)
{
StringPiece line; StringPiece line;
while ((line = from.ReadLine()).starts_with("#")); while ((line = from.ReadLine()).starts_with("#"));
return line; return line;
} }
Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){ Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS))
{
} }
const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) { const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str)
{
#if BOOST_VERSION >= 104200 #if BOOST_VERSION >= 104200
Map::const_iterator i= map_.find(str, Hash(), Equals()); Map::const_iterator i= map_.find(str, Hash(), Equals());
#else #else
@ -62,7 +66,8 @@ double_conversion::StringToDoubleConverter converter(double_conversion::StringTo
/** /**
* Reads an incoming edge. Returns edge and source words covered. * Reads an incoming edge. Returns edge and source words covered.
**/ **/
static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) { static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph)
{
Edge* edge = graph.NewEdge(); Edge* edge = graph.NewEdge();
StringPiece line = from.ReadLine(); //Don't allow comments within edge lists StringPiece line = from.ReadLine(); //Don't allow comments within edge lists
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| ")); util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
@ -82,7 +87,7 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
edge->AddWord(&found); edge->AddWord(&found);
} }
} }
//Features //Features
++pipes; ++pipes;
for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) { for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
@ -100,17 +105,18 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
//Covered words //Covered words
++pipes; ++pipes;
size_t sourceCovered = boost::lexical_cast<size_t>(*pipes); size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
return pair<Edge*,size_t>(edge,sourceCovered); return pair<Edge*,size_t>(edge,sourceCovered);
} }
void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const { void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const
{
Graph& newGraph = *pNewGraph; Graph& newGraph = *pNewGraph;
//TODO: Optimise case where no pruning required //TODO: Optimise case where no pruning required
//For debug //For debug
/* /*
map<const Edge*, string> edgeIds; map<const Edge*, string> edgeIds;
for (size_t i = 0; i < edges_.Size(); ++i) { for (size_t i = 0; i < edges_.Size(); ++i) {
@ -136,7 +142,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//Compute backward scores //Compute backward scores
for (size_t vi = 0; vi < vertices_.Size(); ++vi) { for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
// cerr << "Vertex " << vi << endl; // cerr << "Vertex " << vi << endl;
const Vertex& vertex = vertices_[vi]; const Vertex& vertex = vertices_[vi];
const vector<const Edge*>& incoming = vertex.GetIncoming(); const vector<const Edge*>& incoming = vertex.GetIncoming();
if (!incoming.size()) { if (!incoming.size()) {
@ -150,7 +156,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//cerr << "\tChild " << incoming[ei]->Children()[i] << endl; //cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
size_t childId = incoming[ei]->Children()[i]; size_t childId = incoming[ei]->Children()[i];
UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore, UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId); HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
outgoing[childId].push_back(incoming[ei]); outgoing[childId].push_back(incoming[ei]);
incomingScore += vertexBackwardScores[childId]; incomingScore += vertexBackwardScores[childId];
} }
@ -172,7 +178,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} else { } else {
for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) { for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
//cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl; //cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
FeatureStatsType outgoingScore = 0; FeatureStatsType outgoingScore = 0;
//add score of head //add score of head
outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]]; outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
//cerr << "Forward score " << outgoingScore << endl; //cerr << "Forward score " << outgoingScore << endl;
@ -204,11 +210,11 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} }
FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge]; FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge)); edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
// cerr << edgeIds[edge] << " " << score << endl; // cerr << edgeIds[edge] << " " << score << endl;
} }
multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin(); multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
size_t edgeCount = 1; size_t edgeCount = 1;
while(edgeCount < minEdgeCount && ei != edgeScores.rend()) { while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
@ -235,10 +241,10 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
map<size_t,size_t> oldIdToNew; map<size_t,size_t> oldIdToNew;
size_t vi = 0; size_t vi = 0;
for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) { for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
// cerr << *i << " New: " << vi << endl; // cerr << *i << " New: " << vi << endl;
oldIdToNew[*i] = vi; oldIdToNew[*i] = vi;
Vertex* vertex = newGraph.NewVertex(); Vertex* vertex = newGraph.NewVertex();
vertex->SetSourceCovered(vertices_[*i].SourceCovered()); vertex->SetSourceCovered(vertices_[*i].SourceCovered());
} }
for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) { for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
@ -255,7 +261,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
newHead.AddEdge(newEdge); newHead.AddEdge(newEdge);
} }
/* /*
cerr << "New graph" << endl; cerr << "New graph" << endl;
for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) { for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
@ -275,21 +281,22 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} }
cerr << endl; cerr << endl;
} }
*/ */
} }
/** /**
* Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments) * Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
**/ **/
void ReadGraph(util::FilePiece &from, Graph &graph) { void ReadGraph(util::FilePiece &from, Graph &graph)
{
//First line should contain field names //First line should contain field names
StringPiece line = from.ReadLine(); StringPiece line = from.ReadLine();
UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'"); UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
line = NextLine(from); line = NextLine(from);
//Then expect numbers of vertices //Then expect numbers of vertices
util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' ')); util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i); unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
@ -304,9 +311,11 @@ void ReadGraph(util::FilePiece &from, Graph &graph) {
for (unsigned long int e = 0; e < edge_count; ++e) { for (unsigned long int e = 0; e < edge_count; ++e) {
pair<Edge*,size_t> edge = ReadEdge(from, graph); pair<Edge*,size_t> edge = ReadEdge(from, graph);
vertex->AddEdge(edge.first); vertex->AddEdge(edge.first);
//Note: the file format attaches this to the edge, but it's really a property //Note: the file format attaches this to the edge, but it's really a property
//of the vertex. //of the vertex.
if (!e) {vertex->SetSourceCovered(edge.second);} if (!e) {
vertex->SetSourceCovered(edge.second);
}
} }
} }
} }

View File

@ -37,81 +37,88 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h" #include "FeatureStats.h"
namespace MosesTuning { namespace MosesTuning
{
typedef unsigned int WordIndex; typedef unsigned int WordIndex;
const WordIndex kMaxWordIndex = UINT_MAX; const WordIndex kMaxWordIndex = UINT_MAX;
const FeatureStatsType kMinScore = -1e10; const FeatureStatsType kMinScore = -1e10;
template <class T> class FixedAllocator : boost::noncopyable { template <class T> class FixedAllocator : boost::noncopyable
public: {
FixedAllocator() : current_(NULL), end_(NULL) {} public:
FixedAllocator() : current_(NULL), end_(NULL) {}
void Init(std::size_t count) { void Init(std::size_t count) {
assert(!current_); assert(!current_);
array_.reset(new T[count]); array_.reset(new T[count]);
current_ = array_.get(); current_ = array_.get();
end_ = current_ + count; end_ = current_ + count;
} }
T &operator[](std::size_t idx) { T &operator[](std::size_t idx) {
return array_.get()[idx]; return array_.get()[idx];
} }
const T &operator[](std::size_t idx) const { const T &operator[](std::size_t idx) const {
return array_.get()[idx]; return array_.get()[idx];
} }
T *New() { T *New() {
T *ret = current_++; T *ret = current_++;
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end"); UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
return ret; return ret;
} }
std::size_t Capacity() const { std::size_t Capacity() const {
return end_ - array_.get(); return end_ - array_.get();
} }
std::size_t Size() const { std::size_t Size() const {
return current_ - array_.get(); return current_ - array_.get();
} }
private: private:
boost::scoped_array<T> array_; boost::scoped_array<T> array_;
T *current_, *end_; T *current_, *end_;
}; };
class Vocab { class Vocab
public: {
Vocab(); public:
Vocab();
typedef std::pair<const char *const, WordIndex> Entry; typedef std::pair<const char *const, WordIndex> Entry;
const Entry &FindOrAdd(const StringPiece &str); const Entry &FindOrAdd(const StringPiece &str);
const Entry& Bos() const {return bos_;} const Entry& Bos() const {
return bos_;
}
const Entry& Eos() const {return eos_;} const Entry& Eos() const {
return eos_;
}
private: private:
util::Pool piece_backing_; util::Pool piece_backing_;
struct Hash : public std::unary_function<const char *, std::size_t> { struct Hash : public std::unary_function<const char *, std::size_t> {
std::size_t operator()(StringPiece str) const { std::size_t operator()(StringPiece str) const {
return util::MurmurHashNative(str.data(), str.size()); return util::MurmurHashNative(str.data(), str.size());
} }
}; };
struct Equals : public std::binary_function<const char *, const char *, bool> { struct Equals : public std::binary_function<const char *, const char *, bool> {
bool operator()(StringPiece first, StringPiece second) const { bool operator()(StringPiece first, StringPiece second) const {
return first == second; return first == second;
} }
}; };
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map; typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
Map map_; Map map_;
Entry eos_; Entry eos_;
Entry bos_; Entry bos_;
}; };
@ -125,121 +132,141 @@ typedef boost::shared_ptr<SparseVector> FeaturePtr;
/** /**
* An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector. * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
**/ **/
class Edge { class Edge
public: {
Edge() {features_.reset(new SparseVector());} public:
Edge() {
features_.reset(new SparseVector());
}
void AddWord(const Vocab::Entry *word) { void AddWord(const Vocab::Entry *word) {
words_.push_back(word); words_.push_back(word);
} }
void AddChild(size_t child) { void AddChild(size_t child) {
children_.push_back(child); children_.push_back(child);
} }
void AddFeature(const StringPiece& name, FeatureStatsType value) { void AddFeature(const StringPiece& name, FeatureStatsType value) {
//TODO StringPiece interface //TODO StringPiece interface
features_->set(name.as_string(),value); features_->set(name.as_string(),value);
} }
const WordVec &Words() const { const WordVec &Words() const {
return words_; return words_;
} }
const FeaturePtr& Features() const {
return features_;
}
void SetFeatures(const FeaturePtr& features) { const FeaturePtr& Features() const {
features_ = features; return features_;
} }
const std::vector<size_t>& Children() const { void SetFeatures(const FeaturePtr& features) {
return children_; features_ = features;
} }
FeatureStatsType GetScore(const SparseVector& weights) const { const std::vector<size_t>& Children() const {
return inner_product(*(features_.get()), weights); return children_;
} }
private: FeatureStatsType GetScore(const SparseVector& weights) const {
// NULL for non-terminals. return inner_product(*(features_.get()), weights);
std::vector<const Vocab::Entry*> words_; }
std::vector<size_t> children_;
boost::shared_ptr<SparseVector> features_; private:
// NULL for non-terminals.
std::vector<const Vocab::Entry*> words_;
std::vector<size_t> children_;
boost::shared_ptr<SparseVector> features_;
}; };
/* /*
* A vertex has 0..n incoming edges * A vertex has 0..n incoming edges
**/ **/
class Vertex { class Vertex
public: {
Vertex() : sourceCovered_(0) {} public:
Vertex() : sourceCovered_(0) {}
void AddEdge(const Edge* edge) {incoming_.push_back(edge);} void AddEdge(const Edge* edge) {
incoming_.push_back(edge);
}
void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;} void SetSourceCovered(size_t sourceCovered) {
sourceCovered_ = sourceCovered;
}
const std::vector<const Edge*>& GetIncoming() const {return incoming_;} const std::vector<const Edge*>& GetIncoming() const {
return incoming_;
}
size_t SourceCovered() const {return sourceCovered_;} size_t SourceCovered() const {
return sourceCovered_;
}
private: private:
std::vector<const Edge*> incoming_; std::vector<const Edge*> incoming_;
size_t sourceCovered_; size_t sourceCovered_;
}; };
class Graph : boost::noncopyable { class Graph : boost::noncopyable
public: {
Graph(Vocab& vocab) : vocab_(vocab) {} public:
Graph(Vocab& vocab) : vocab_(vocab) {}
void SetCounts(std::size_t vertices, std::size_t edges) { void SetCounts(std::size_t vertices, std::size_t edges) {
vertices_.Init(vertices); vertices_.Init(vertices);
edges_.Init(edges); edges_.Init(edges);
} }
Vocab &MutableVocab() { return vocab_; } Vocab &MutableVocab() {
return vocab_;
}
Edge *NewEdge() { Edge *NewEdge() {
return edges_.New(); return edges_.New();
} }
Vertex *NewVertex() { Vertex *NewVertex() {
return vertices_.New(); return vertices_.New();
} }
const Vertex &GetVertex(std::size_t index) const { const Vertex &GetVertex(std::size_t index) const {
return vertices_[index]; return vertices_[index];
} }
Edge &GetEdge(std::size_t index) { Edge &GetEdge(std::size_t index) {
return edges_[index]; return edges_[index];
} }
/* Created a pruned copy of this graph with minEdgeCount edges. Uses /* Created a pruned copy of this graph with minEdgeCount edges. Uses
the scores in the max-product semiring to rank edges, as suggested by the scores in the max-product semiring to rank edges, as suggested by
Colin Cherry */ Colin Cherry */
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const; void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
std::size_t VertexSize() const { return vertices_.Size(); } std::size_t VertexSize() const {
std::size_t EdgeSize() const { return edges_.Size(); } return vertices_.Size();
}
std::size_t EdgeSize() const {
return edges_.Size();
}
bool IsBoundary(const Vocab::Entry* word) const { bool IsBoundary(const Vocab::Entry* word) const {
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second; return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
} }
private: private:
FixedAllocator<Edge> edges_; FixedAllocator<Edge> edges_;
FixedAllocator<Vertex> vertices_; FixedAllocator<Vertex> vertices_;
Vocab& vocab_; Vocab& vocab_;
}; };
class HypergraphException : public util::Exception { class HypergraphException : public util::Exception
public: {
HypergraphException() {} public:
~HypergraphException() throw() {} HypergraphException() {}
~HypergraphException() throw() {}
}; };

View File

@ -8,12 +8,12 @@
using namespace std; using namespace std;
using namespace MosesTuning; using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(prune) BOOST_AUTO_TEST_CASE(prune)
{ {
Vocab vocab; Vocab vocab;
WordVec words; WordVec words;
string wordStrings[] = string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"}; {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) { for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i])))); words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
} }
@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(prune)
BOOST_CHECK_EQUAL(5, pruned.EdgeSize()); BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
BOOST_CHECK_EQUAL(4, pruned.VertexSize()); BOOST_CHECK_EQUAL(4, pruned.VertexSize());
//edges retained should be best path (<s> ab jk </s>) and hi //edges retained should be best path (<s> ab jk </s>) and hi
BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size()); BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size()); BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
@ -115,37 +115,37 @@ BOOST_AUTO_TEST_CASE(prune)
const Edge* edge; const Edge* edge;
edge = pruned.GetVertex(0).GetIncoming()[0]; edge = pruned.GetVertex(0).GetIncoming()[0];
BOOST_CHECK_EQUAL(1, edge->Words().size()); BOOST_CHECK_EQUAL(1, edge->Words().size());
BOOST_CHECK_EQUAL(words[0], edge->Words()[0]); BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
edge = pruned.GetVertex(1).GetIncoming()[0]; edge = pruned.GetVertex(1).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size()); BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first); BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first); BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(1).GetIncoming()[1]; edge = pruned.GetVertex(1).GetIncoming()[1];
BOOST_CHECK_EQUAL(3, edge->Words().size()); BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first); BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first); BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(2).GetIncoming()[0]; edge = pruned.GetVertex(2).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size()); BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first); BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first); BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(3).GetIncoming()[0]; edge = pruned.GetVertex(3).GetIncoming()[0];
BOOST_CHECK_EQUAL(2, edge->Words().size()); BOOST_CHECK_EQUAL(2, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first); BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
} }

View File

@ -174,19 +174,19 @@ float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& tota
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
{ {
size_t scorerNum = 0; size_t scorerNum = 0;
size_t last = 0; size_t last = 0;
float refLen = 0; float refLen = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin(); for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) { itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores(); int numScoresScorer = (*itsc)->NumberOfScores();
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer); std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum]; refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
last += numScoresScorer; last += numScoresScorer;
scorerNum++; scorerNum++;
}
return refLen;
} }
return refLen;
}
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles) void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{ {

View File

@ -9,7 +9,8 @@ namespace MosesTuning
{ {
void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) { void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit)
{
vector<size_t> sparseFeats = sparse.feats(); vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true; bool bFirst = true;
size_t lastFeat = 0; size_t lastFeat = 0;
@ -40,7 +41,8 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
InitSparse(vec.sparse); InitSparse(vec.sparse);
} }
MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) { MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense)
{
m_dense.resize(num_dense); m_dense.resize(num_dense);
//Assume that features with id [0,num_dense) are the dense features //Assume that features with id [0,num_dense) are the dense features
for (size_t id = 0; id < num_dense; ++id) { for (size_t id = 0; id < num_dense; ++id) {
@ -162,7 +164,8 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals); return MiraFeatureVector(dense,sparseFeats,sparseVals);
} }
bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) { bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b)
{
ValType eps = 1e-8; ValType eps = 1e-8;
//dense features //dense features
if (a.m_dense.size() != b.m_dense.size()) return false; if (a.m_dense.size() != b.m_dense.size()) return false;

View File

@ -93,7 +93,8 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates; m_lastUpdated[index] = m_numUpdates;
} }
void MiraWeightVector::ToSparse(SparseVector* sparse) const { void MiraWeightVector::ToSparse(SparseVector* sparse) const
{
for (size_t i = 0; i < m_weights.size(); ++i) { for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) { if(abs(m_weights[i])>1e-8) {
sparse->set(i,m_weights[i]); sparse->set(i,m_weights[i]);
@ -171,7 +172,8 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size(); return m_wv.m_weights.size();
} }
void AvgWeightVector::ToSparse(SparseVector* sparse) const { void AvgWeightVector::ToSparse(SparseVector* sparse) const
{
for (size_t i = 0; i < size(); ++i) { for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i); ValType w = weight(i);
if(abs(w)>1e-8) { if(abs(w)>1e-8) {

View File

@ -23,7 +23,7 @@ namespace MosesTuning
*/ */
class StatisticsBasedScorer : public Scorer class StatisticsBasedScorer : public Scorer
{ {
friend class HopeFearDecoder; friend class HopeFearDecoder;
public: public:
StatisticsBasedScorer(const std::string& name, const std::string& config); StatisticsBasedScorer(const std::string& name, const std::string& config);

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace TERCpp namespace TERCpp
{ {
string alignmentStruct::toString() string alignmentStruct::toString()
{ {
stringstream s; stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl; // s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl; // s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl; // s << "afterShift" << vectorToString(alignment)<<endl;
s << "Nothing to be printed" <<endl; s << "Nothing to be printed" <<endl;
return s.str(); return s.str();
} }
// alignmentStruct::alignmentStruct() // alignmentStruct::alignmentStruct()
// { // {
@ -99,7 +99,7 @@ namespace TERCpp
// return s.str(); // return s.str();
// } // }
/* The distance of the shift. */ /* The distance of the shift. */
// int alignmentStruct::distance() // int alignmentStruct::distance()
// { // {
// if (moveto < start) // if (moveto < start)

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class alignmentStruct class alignmentStruct
{ {
private: private:
public: public:
// alignmentStruct(); // alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc); // alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -53,14 +53,14 @@ namespace TERCpp
// int end; // int end;
// int moveto; // int moveto;
// int newloc; // int newloc;
vector<string> nwords; // The words we shifted vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
double cost; double cost;
string toString(); string toString();
}; };
} }
#endif #endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class bestShiftStruct class bestShiftStruct
{ {
private: private:
public: public:
// alignmentStruct(); // alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc); // alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -55,16 +55,16 @@ namespace TERCpp
// int end; // int end;
// int moveto; // int moveto;
// int newloc; // int newloc;
terShift m_best_shift; terShift m_best_shift;
terAlignment m_best_align; terAlignment m_best_align;
bool m_empty; bool m_empty;
// vector<string> nwords; // The words we shifted // vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output // char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output // vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
// double cost; // double cost;
}; };
} }
#endif #endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -28,156 +28,142 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMap::hashMap(); // hashMap::hashMap();
/* hashMap::~hashMap() /* hashMap::~hashMap()
{
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{ {
long foundKey; // vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
int hashMap::trouve ( string key ) }
{ return 0;
long searchKey=hashValue ( key ); }
long foundKey;; int hashMap::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
/** }
* long hashMap::hashValue ( string key ) return 0;
* @param key }
* @return /**
*/ * long hashMap::hashValue ( string key )
long hashMap::hashValue ( string key ) * @param key
{ * @return
locale loc; // the "C" locale */
const collate<char>& coll = use_facet<collate<char> >(loc); long hashMap::hashValue ( string key )
return coll.hash(key.data(),key.data()+key.length()); {
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
} }
/** /**
* void hashMap::addHasher ( string key, string value ) * void hashMap::addHasher ( string key, string value )
* @param key * @param key
* @param value * @param value
*/ */
void hashMap::addHasher ( string key, string value ) void hashMap::addHasher ( string key, string value )
{ {
if ( trouve ( hashValue ( key ) ) ==0 ) if ( trouve ( hashValue ( key ) ) ==0 ) {
{
// cerr << "ICI1" <<endl; // cerr << "ICI1" <<endl;
stringHasher H ( hashValue ( key ),key,value ); stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl; // cerr << "ICI2" <<endl;
m_hasher.push_back ( H ); m_hasher.push_back ( H );
} }
} }
stringHasher hashMap::getHasher ( string key ) stringHasher hashMap::getHasher ( string key )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
stringHasher defaut(0,"",""); stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return ( *l_hasher );
{
return ( *l_hasher );
}
}
return defaut;
} }
string hashMap::getValue ( string key ) }
{ return defaut;
long searchKey=hashValue ( key ); }
long foundKey; string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue(); return ( *l_hasher ).getValue();
}
}
return "";
} }
string hashMap::searchValue ( string value ) }
{ return "";
}
string hashMap::searchValue ( string value )
{
// long searchKey=hashValue ( key ); // long searchKey=hashValue ( key );
// long foundKey; // long foundKey;
string foundValue; string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundValue= ( *l_hasher ).getValue();
foundValue= ( *l_hasher ).getValue(); if ( foundValue.compare ( value ) == 0 ) {
if ( foundValue.compare ( value ) == 0 ) return ( *l_hasher ).getKey();
{
return ( *l_hasher ).getKey();
}
}
return "";
} }
}
return "";
}
void hashMap::setValue ( string key , string value ) void hashMap::setValue ( string key , string value )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) ( *l_hasher ).setValue ( value );
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue(); // return ( *l_hasher ).getValue();
}
}
} }
}
}
/** /**
* *
*/ */
void hashMap::printHash() void hashMap::printHash()
{ {
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; }
} }
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMap class hashMap
{ {
private: private:
vector<stringHasher> m_hasher; vector<stringHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, string value ); void addHasher ( string key, string value );
stringHasher getHasher ( string key ); stringHasher getHasher ( string key );
string getValue ( string key ); string getValue ( string key );
string searchValue ( string key ); string searchValue ( string key );
void setValue ( string key , string value ); void setValue ( string key , string value );
void printHash(); void printHash();
vector<stringHasher> getHashMap(); vector<stringHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -28,117 +28,108 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMapInfos::hashMap(); // hashMapInfos::hashMap();
/* hashMapInfos::~hashMap() /* hashMapInfos::~hashMap()
{
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{ {
long foundKey; // vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
int hashMapInfos::trouve ( string key ) }
{ return 0;
long searchKey=hashValue ( key ); }
long foundKey;; int hashMapInfos::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
}
return 0;
}
/** /**
* long hashMapInfos::hashValue ( string key ) * long hashMapInfos::hashValue ( string key )
* @param key * @param key
* @return * @return
*/ */
long hashMapInfos::hashValue ( string key ) long hashMapInfos::hashValue ( string key )
{ {
locale loc; // the "C" locale locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc); const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length()); return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
} }
/** /**
* void hashMapInfos::addHasher ( string key, string value ) * void hashMapInfos::addHasher ( string key, string value )
* @param key * @param key
* @param value * @param value
*/ */
void hashMapInfos::addHasher ( string key, vector<int> value ) void hashMapInfos::addHasher ( string key, vector<int> value )
{ {
if ( trouve ( hashValue ( key ) ) ==0 ) if ( trouve ( hashValue ( key ) ) ==0 ) {
{
// cerr << "ICI1" <<endl; // cerr << "ICI1" <<endl;
infosHasher H ( hashValue ( key ),key,value ); infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl; // cerr << "ICI2" <<endl;
m_hasher.push_back ( H ); m_hasher.push_back ( H );
} }
} }
void hashMapInfos::addValue ( string key, vector<int> value ) void hashMapInfos::addValue ( string key, vector<int> value )
{ {
addHasher ( key, value ); addHasher ( key, value );
} }
infosHasher hashMapInfos::getHasher ( string key ) infosHasher hashMapInfos::getHasher ( string key )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return ( *l_hasher );
{
return ( *l_hasher );
}
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
} }
vector<int> hashMapInfos::getValue ( string key ) }
{ vector<int> temp;
long searchKey=hashValue ( key ); infosHasher defaut(0,"",temp);
long foundKey; return defaut;
vector<int> retour; }
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue(); return ( *l_hasher ).getValue();
}
}
return retour;
} }
}
return retour;
}
// string hashMapInfos::searchValue ( string value ) // string hashMapInfos::searchValue ( string value )
// { // {
// // long searchKey=hashValue ( key ); // // long searchKey=hashValue ( key );
@ -158,42 +149,38 @@ namespace HashMapSpace
// } // }
// //
void hashMapInfos::setValue ( string key , vector<int> value ) void hashMapInfos::setValue ( string key , vector<int> value )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{ foundKey= ( *l_hasher ).getHashKey();
foundKey= ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) ( *l_hasher ).setValue ( value );
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue(); // return ( *l_hasher ).getValue();
}
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
} }
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
/** /**
* *
*/ */
void hashMapInfos::printHash() void hashMapInfos::printHash()
{ {
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
} }
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMapInfos class hashMapInfos
{ {
private: private:
vector<infosHasher> m_hasher; vector<infosHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, vector<int> value ); void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value ); void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key ); infosHasher getHasher ( string key );
vector<int> getValue ( string key ); vector<int> getValue ( string key );
// string searchValue ( string key ); // string searchValue ( string key );
void setValue ( string key , vector<int> value ); void setValue ( string key , vector<int> value );
void printHash(); void printHash();
string toString(); string toString();
vector<infosHasher> getHashMap(); vector<infosHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,179 +27,166 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMapStringInfos::hashMap(); // hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap() /* hashMapStringInfos::~hashMap()
{ {
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin(); // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{ {
delete(*del); delete(*del);
} }
}*/ }*/
/** /**
* int hashMapStringInfos::trouve ( long searchKey ) * int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey * @param searchKey
* @return * @return
*/ */
int hashMapStringInfos::trouve ( long searchKey ) int hashMapStringInfos::trouve ( long searchKey )
{ {
long foundKey; long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
{ foundKey = ( *l_hasher ).getHashKey();
foundKey = ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
}
return 0;
}
int hashMapStringInfos::trouve ( string key ) int hashMapStringInfos::trouve ( string key )
{ {
long searchKey = hashValue ( key ); long searchKey = hashValue ( key );
long foundKey;; long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
{ foundKey = ( *l_hasher ).getHashKey();
foundKey = ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) return 1;
{
return 1;
}
}
return 0;
} }
}
return 0;
}
/** /**
* long hashMapStringInfos::hashValue ( string key ) * long hashMapStringInfos::hashValue ( string key )
* @param key * @param key
* @return * @return
*/ */
long hashMapStringInfos::hashValue ( string key ) long hashMapStringInfos::hashValue ( string key )
{ {
locale loc; // the "C" locale locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc ); const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() ); return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
} }
/** /**
* void hashMapStringInfos::addHasher ( string key, string value ) * void hashMapStringInfos::addHasher ( string key, string value )
* @param key * @param key
* @param value * @param value
*/ */
void hashMapStringInfos::addHasher ( string key, vector<string> value ) void hashMapStringInfos::addHasher ( string key, vector<string> value )
{ {
if ( trouve ( hashValue ( key ) ) == 0 ) if ( trouve ( hashValue ( key ) ) == 0 ) {
{ // cerr << "ICI1" <<endl;
// cerr << "ICI1" <<endl; stringInfosHasher H ( hashValue ( key ), key, value );
stringInfosHasher H ( hashValue ( key ), key, value ); // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr << "ICI2" <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H ); m_hasher.push_back ( H );
} }
}
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
} }
void hashMapStringInfos::addValue ( string key, vector<string> value ) }
{ vector<string> tmp;
addHasher ( key, value ); stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
} }
stringInfosHasher hashMapStringInfos::getHasher ( string key ) }
{ return retour;
long searchKey = hashValue ( key ); }
long foundKey; // string hashMapStringInfos::searchValue ( string value )
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // {
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) // // long searchKey=hashValue ( key );
{ // // long foundKey;
foundKey = ( *l_hasher ).getHashKey(); // vector<int> foundValue;
if ( searchKey == foundKey ) //
{ // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
return ( *l_hasher ); // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
} // {
} // foundValue= ( *l_hasher ).getValue();
vector<string> tmp; // /* if ( foundValue.compare ( value ) == 0 )
stringInfosHasher defaut ( 0, "", tmp ); // {
return defaut; // return ( *l_hasher ).getKey();
} // }*/
vector<string> hashMapStringInfos::getValue ( string key ) // }
{ // return "";
long searchKey = hashValue ( key ); // }
long foundKey; //
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
void hashMapStringInfos::setValue ( string key , vector<string> value ) void hashMapStringInfos::setValue ( string key , vector<string> value )
{ {
long searchKey = hashValue ( key ); long searchKey = hashValue ( key );
long foundKey; long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
{ foundKey = ( *l_hasher ).getHashKey();
foundKey = ( *l_hasher ).getHashKey(); if ( searchKey == foundKey ) {
if ( searchKey == foundKey ) ( *l_hasher ).setValue ( value );
{ // return ( *l_hasher ).getValue();
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
} }
}
}
string hashMapStringInfos::toString () string hashMapStringInfos::toString ()
{ {
stringstream to_return; stringstream to_return;
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
{ to_return << (*l_hasher).toString();
to_return << (*l_hasher).toString(); // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; }
} return to_return.str();
return to_return.str(); }
}
/** /**
* *
*/ */
void hashMapStringInfos::printHash() void hashMapStringInfos::printHash()
{ {
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
{ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; }
} }
} vector< stringInfosHasher > hashMapStringInfos::getHashMap()
vector< stringInfosHasher > hashMapStringInfos::getHashMap() {
{ return m_hasher;
return m_hasher; }
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMapStringInfos class hashMapStringInfos
{ {
private: private:
vector<stringInfosHasher> m_hasher; vector<stringInfosHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, vector<string> value ); void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value ); void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key ); stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key ); vector<string> getValue ( string key );
// string searchValue ( string key ); // string searchValue ( string key );
void setValue ( string key , vector<string> value ); void setValue ( string key , vector<string> value );
void printHash(); void printHash();
string toString(); string toString();
vector<stringInfosHasher> getHashMap(); vector<stringInfosHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace namespace HashMapSpace
{ {
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt ) infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueVecInt; m_value=valueVecInt;
} }
// infosHasher::~infosHasher(){};*/ // infosHasher::~infosHasher(){};*/
long infosHasher::getHashKey() long infosHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string infosHasher::getKey() string infosHasher::getKey()
{ {
return m_key; return m_key;
} }
vector<int> infosHasher::getValue() vector<int> infosHasher::getValue()
{ {
return m_value; return m_value;
} }
void infosHasher::setValue ( vector<int> value ) void infosHasher::setValue ( vector<int> value )
{ {
m_value=value; m_value=value;
} }
string infosHasher::toString() string infosHasher::toString()
{ {
stringstream to_return; stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str(); return to_return.str();
} }
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S; // typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class infosHasher class infosHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
vector<int> m_value; vector<int> m_value;
public: public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt ); infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
vector<int> getValue(); vector<int> getValue();
void setValue ( vector<int> value ); void setValue ( vector<int> value );
string toString(); string toString();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueTxt; m_value=valueTxt;
} }
// stringHasher::~stringHasher(){};*/ // stringHasher::~stringHasher(){};*/
long stringHasher::getHashKey() long stringHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string stringHasher::getKey() string stringHasher::getKey()
{ {
return m_key; return m_key;
} }
string stringHasher::getValue() string stringHasher::getValue()
{ {
return m_value; return m_value;
} }
void stringHasher::setValue ( string value ) void stringHasher::setValue ( string value )
{ {
m_value=value; m_value=value;
} }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class stringHasher class stringHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
string m_value; string m_value;
public: public:
stringHasher ( long cle, string cleTxt, string valueTxt ); stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
string getValue(); string getValue();
void setValue ( string value ); void setValue ( string value );
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace namespace HashMapSpace
{ {
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ) stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueVecInt; m_value=valueVecInt;
} }
// stringInfosHasher::~stringInfosHasher(){};*/ // stringInfosHasher::~stringInfosHasher(){};*/
long stringInfosHasher::getHashKey() long stringInfosHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string stringInfosHasher::getKey() string stringInfosHasher::getKey()
{ {
return m_key; return m_key;
} }
vector<string> stringInfosHasher::getValue() vector<string> stringInfosHasher::getValue()
{ {
return m_value; return m_value;
} }
void stringInfosHasher::setValue ( vector<string> value ) void stringInfosHasher::setValue ( vector<string> value )
{ {
m_value=value; m_value=value;
} }
string stringInfosHasher::toString() string stringInfosHasher::toString()
{ {
stringstream to_return; stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str(); return to_return.str();
} }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class stringInfosHasher class stringInfosHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
vector<string> m_value; vector<string> m_value;
public: public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ); stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
vector<string> getValue(); vector<string> getValue();
void setValue ( vector<string> value ); void setValue ( vector<string> value );
string toString(); string toString();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -24,191 +24,163 @@ using namespace std;
namespace TERCpp namespace TERCpp
{ {
terAlignment::terAlignment() terAlignment::terAlignment()
{ {
// vector<string> ref; // vector<string> ref;
// vector<string> hyp; // vector<string> hyp;
// vector<string> aftershift; // vector<string> aftershift;
// TERshift[] allshifts = null; // TERshift[] allshifts = null;
numEdits=0; numEdits=0;
numWords=0; numWords=0;
bestRef=""; bestRef="";
numIns=0; numIns=0;
numDel=0; numDel=0;
numSub=0; numSub=0;
numSft=0; numSft=0;
numWsf=0; numWsf=0;
} }
string terAlignment::toString() string terAlignment::toString()
{ {
stringstream s; stringstream s;
s.str ( "" ); s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl; s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl; s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift ); s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift ); // s << "Hyp After Shift: " << join ( " ", aftershift );
s << endl; s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift); // string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
if ( ( int ) sizeof ( alignment ) >0 ) if ( ( int ) sizeof ( alignment ) >0 ) {
{ s << "Alignment: (";
s << "Alignment: (";
// s += "\nAlignment: ("; // s += "\nAlignment: (";
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
{ s << alignment[i];
s << alignment[i];
// s+=alignment[i]; // s+=alignment[i];
}
// s += ")";
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0";
s << "NumShifts: 0";
}
else
{
// s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
{
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str();
} }
string terAlignment::join ( string delim, vector<string> arr ) // s += ")";
{ s << ")";
if ( ( int ) arr.size() == 0 ) return ""; }
s << endl;
if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
s << "NumShifts: 0";
} else {
// s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str();
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String(""); // if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String(""); // String s = new String("");
stringstream s; stringstream s;
s.str ( "" ); s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ ) for ( int i = 0; i < ( int ) arr.size(); i++ ) {
{ if ( i == 0 ) {
if ( i == 0 ) s << arr.at ( i );
{ } else {
s << arr.at ( i ); s << delim << arr.at ( i );
} }
else }
{ return s.str();
s << delim << arr.at ( i );
}
}
return s.str();
// return ""; // return "";
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( numWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( averageWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0) {
for(int i = 0; i < (int)allshifts.size(); ++i) {
numWsf += allshifts[i].size();
} }
double terAlignment::score() numSft = allshifts.size();
{ }
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
{ if((int)alignment.size()>0 ) {
return 1.0; for(int i = 0; i < (int)alignment.size(); ++i) {
} switch (alignment[i]) {
if ( numWords <= 0.0 ) case 'S':
{ case 'T':
return 0.0; numSub++;
} break;
return ( double ) numEdits / numWords; case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
} }
double terAlignment::scoreAv() }
{ // if(numEdits != numSft + numDel + numIns + numSub)
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) // System.out.println("** Error, unmatch edit erros " + numEdits +
{ // " vs " + (numSft + numDel + numIns + numSub));
return 1.0; }
} string terAlignment::printAlignments()
if ( averageWords <= 0.0 ) {
{ stringstream to_return;
return 0.0; for(int i = 0; i < (int)alignment.size(); ++i) {
} char alignInfo=alignment.at(i);
return ( double ) numEdits / averageWords; if (alignInfo == 'A' ) {
alignInfo='A';
} }
void terAlignment::scoreDetails() if (i==0) {
{ to_return << alignInfo;
numIns = numDel = numSub = numWsf = numSft = 0; } else {
if((int)allshifts.size()>0) to_return << " " << alignInfo;
{ }
for(int i = 0; i < (int)allshifts.size(); ++i)
{
numWsf += allshifts[i].size();
}
numSft = allshifts.size();
}
if((int)alignment.size()>0 )
{
for(int i = 0; i < (int)alignment.size(); ++i)
{
switch (alignment[i])
{
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
}
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i)
{
char alignInfo=alignment.at(i);
if (alignInfo == 'A' )
{
alignInfo='A';
}
if (i==0)
{
to_return << alignInfo;
}
else
{
to_return << " " << alignInfo;
}
}
return to_return.str();
} }
return to_return.str();
}
string terAlignment::printAllShifts() string terAlignment::printAllShifts()
{ {
stringstream to_return; stringstream to_return;
if ( ( int ) allshifts.size() == 0 ) if ( ( int ) allshifts.size() == 0 ) {
{
// s += "\nNumShifts: 0"; // s += "\nNumShifts: 0";
to_return << "NbrShifts: 0"; to_return << "NbrShifts: 0";
} } else {
else
{
// s += "\nNumShifts: " + (int)allshifts.size(); // s += "\nNumShifts: " + (int)allshifts.size();
to_return << "NbrShifts: "<< ( int ) allshifts.size(); to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
{ to_return << "\t" ;
to_return << "\t" ; to_return << ( ( terShift ) allshifts[i] ).toString();
to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i]; // s += "\n " + allshifts[i];
} }
} }
return to_return.str(); return to_return.str();
} }
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -34,41 +34,41 @@ using namespace std;
namespace TERCpp namespace TERCpp
{ {
class terAlignment class terAlignment
{ {
private: private:
public: public:
terAlignment(); terAlignment();
string toString(); string toString();
void scoreDetails(); void scoreDetails();
vector<string> ref; vector<string> ref;
vector<string> hyp; vector<string> hyp;
vector<string> aftershift; vector<string> aftershift;
vector<terShift> allshifts; vector<terShift> allshifts;
vector<int> hyp_int; vector<int> hyp_int;
vector<int> aftershift_int; vector<int> aftershift_int;
double numEdits; double numEdits;
double numWords; double numWords;
double averageWords; double averageWords;
vector<char> alignment; vector<char> alignment;
string bestRef; string bestRef;
int numIns; int numIns;
int numDel; int numDel;
int numSub; int numSub;
int numSft; int numSft;
int numWsf; int numWsf;
string join ( string delim, vector<string> arr ); string join ( string delim, vector<string> arr );
double score(); double score();
double scoreAv(); double scoreAv();
string printAlignments(); string printAlignments();
string printAllShifts(); string printAllShifts();
}; };
} }
#endif #endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -42,32 +42,32 @@ namespace TERCpp
// numSft=0; // numSft=0;
// numWsf=0; // numWsf=0;
// } // }
terShift::terShift () terShift::terShift ()
{ {
start = 0; start = 0;
end = 0; end = 0;
moveto = 0; moveto = 0;
newloc = 0; newloc = 0;
cost=1.0; cost=1.0;
} }
terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
{ {
start = _start; start = _start;
end = _end; end = _end;
moveto = _moveto; moveto = _moveto;
newloc = _newloc; newloc = _newloc;
cost=1.0; cost=1.0;
} }
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ) terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{ {
start = _start; start = _start;
end = _end; end = _end;
moveto = _moveto; moveto = _moveto;
newloc = _newloc; newloc = _newloc;
shifted = _shifted; shifted = _shifted;
cost=1.0; cost=1.0;
} }
// string terShift::vectorToString(vector<string> vec) // string terShift::vectorToString(vector<string> vec)
// { // {
// string retour(""); // string retour("");
@ -78,44 +78,38 @@ namespace TERCpp
// return retour; // return retour;
// } // }
string terShift::toString() string terShift::toString()
{ {
stringstream s; stringstream s;
s.str ( "" ); s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]"; s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 ) if ( ( int ) shifted.size() > 0 ) {
{ s << " (" << vectorToString ( shifted ) << ")";
s << " (" << vectorToString ( shifted ) << ")"; }
} return s.str();
return s.str(); }
}
/* The distance of the shift. */ /* The distance of the shift. */
int terShift::distance() int terShift::distance()
{ {
if ( moveto < start ) if ( moveto < start ) {
{ return start - moveto;
return start - moveto; } else if ( moveto > end ) {
} return moveto - end;
else if ( moveto > end ) } else {
{ return moveto - start;
return moveto - end; }
} }
else
{
return moveto - start;
}
}
bool terShift::leftShift() bool terShift::leftShift()
{ {
return ( moveto < start ); return ( moveto < start );
} }
int terShift::size() int terShift::size()
{ {
return ( end - start ) + 1; return ( end - start ) + 1;
} }
// terShift terShift::operator=(terShift t) // terShift terShift::operator=(terShift t)
// { // {
// //

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -34,32 +34,32 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class terShift class terShift
{ {
private: private:
public: public:
terShift(); terShift();
terShift ( int _start, int _end, int _moveto, int _newloc ); terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ); terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString(); string toString();
int distance() ; int distance() ;
bool leftShift(); bool leftShift();
int size(); int size();
// terShift operator=(terShift t); // terShift operator=(terShift t);
// string vectorToString(vector<string> vec); // string vectorToString(vector<string> vec);
int start; int start;
int end; int end;
int moveto; int moveto;
int newloc; int newloc;
vector<string> shifted; // The words we shifted vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
double cost; double cost;
}; };
} }
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -41,62 +41,62 @@ namespace TERCpp
{ {
// typedef size_t WERelement[2]; // typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
typedef vector<terShift> vecTerShift; typedef vector<terShift> vecTerShift;
/** /**
@author @author
*/ */
class terCalc class terCalc
{ {
private : private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
WERalignment l_WERalignment; WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot // HashMap contenant les valeurs de hash de chaque mot
hashMap bagOfWords; hashMap bagOfWords;
int TAILLE_PERMUT_MAX; int TAILLE_PERMUT_MAX;
// Increments internes // Increments internes
int NBR_SEGS_EVALUATED; int NBR_SEGS_EVALUATED;
int NBR_PERMUTS_CONSID; int NBR_PERMUTS_CONSID;
int NBR_BS_APPELS; int NBR_BS_APPELS;
int DIST_MAX_PERMUT; int DIST_MAX_PERMUT;
bool PRINT_DEBUG; bool PRINT_DEBUG;
// Utilisés dans minDistEdit et ils ne sont pas réajustés // Utilisés dans minDistEdit et ils ne sont pas réajustés
double S[1000][1000]; double S[1000][1000];
char P[1000][1000]; char P[1000][1000];
vector<vecInt> refSpans; vector<vecInt> refSpans;
vector<vecInt> hypSpans; vector<vecInt> hypSpans;
int TAILLE_BEAM; int TAILLE_BEAM;
public: public:
int shift_cost; int shift_cost;
int insert_cost; int insert_cost;
int delete_cost; int delete_cost;
int substitute_cost; int substitute_cost;
int match_cost; int match_cost;
double infinite; double infinite;
terCalc(); terCalc();
// ~terCalc(); // ~terCalc();
// size_t* hashVec ( vector<string> s ); // size_t* hashVec ( vector<string> s );
void setDebugMode ( bool b ); void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp ); // int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp ); // int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp ); // int WERCalculation ( vector<int> ref, vector<int> hyp );
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref ); terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec); // string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end); // vector<string> subVector(vector<string> vec, int start, int end);
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref ); hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans ); terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan ); bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength ); terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
terAlignment TER ( vector<string> hyp, vector<string> ref ); terAlignment TER ( vector<string> hyp, vector<string> ref );
terAlignment TER ( vector<int> hyp, vector<int> ref ); terAlignment TER ( vector<int> hyp, vector<int> ref );
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align ); bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ); void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ); vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
alignmentStruct permuter ( vector<string> words, terShift s ); alignmentStruct permuter ( vector<string> words, terShift s );
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc ); alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
}; };
} }

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -35,32 +35,31 @@ using namespace std;
namespace Tools namespace Tools
{ {
typedef vector<double> vecDouble; typedef vector<double> vecDouble;
typedef vector<char> vecChar; typedef vector<char> vecChar;
typedef vector<int> vecInt; typedef vector<int> vecInt;
typedef vector<float> vecFloat; typedef vector<float> vecFloat;
typedef vector<size_t> vecSize_t; typedef vector<size_t> vecSize_t;
typedef vector<string> vecString; typedef vector<string> vecString;
typedef vector<string> alignmentElement; typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment; typedef vector<alignmentElement> WERalignment;
struct param struct param {
{ bool debugMode;
bool debugMode; string referenceFile; // path to the resources
string referenceFile; // path to the resources string hypothesisFile; // path to the configuration files
string hypothesisFile; // path to the configuration files string outputFileExtension;
string outputFileExtension; string outputFileName;
string outputFileName; bool noPunct;
bool noPunct; bool caseOn;
bool caseOn; bool normalize;
bool normalize; bool tercomLike;
bool tercomLike; bool sgmlInputs;
bool sgmlInputs; bool noTxtIds;
bool noTxtIds; bool printAlignments;
bool printAlignments; bool WER;
bool WER; int debugLevel;
int debugLevel;
}; };
// param = { false, "","","","" }; // param = { false, "","","","" };
@ -68,35 +67,35 @@ struct param
// private: // private:
// public: // public:
string vectorToString ( vector<string> vec ); string vectorToString ( vector<string> vec );
string vectorToString ( vector<char> vec ); string vectorToString ( vector<char> vec );
string vectorToString ( vector<int> vec ); string vectorToString ( vector<int> vec );
string vectorToString ( vector<string> vec, string s ); string vectorToString ( vector<string> vec, string s );
string vectorToString ( vector<char> vec, string s ); string vectorToString ( vector<char> vec, string s );
string vectorToString ( vector<int> vec, string s ); string vectorToString ( vector<int> vec, string s );
string vectorToString ( vector<bool> vec, string s ); string vectorToString ( vector<bool> vec, string s );
string vectorToString ( char* vec, string s, int taille ); string vectorToString ( char* vec, string s, int taille );
string vectorToString ( int* vec, string s , int taille ); string vectorToString ( int* vec, string s , int taille );
string vectorToString ( bool* vec, string s , int taille ); string vectorToString ( bool* vec, string s , int taille );
vector<string> subVector ( vector<string> vec, int start, int end ); vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> subVector ( vector<int> vec, int start, int end ); vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> subVector ( vector<float> vec, int start, int end ); vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> copyVector ( vector<string> vec ); vector<string> copyVector ( vector<string> vec );
vector<int> copyVector ( vector<int> vec ); vector<int> copyVector ( vector<int> vec );
vector<float> copyVector ( vector<float> vec ); vector<float> copyVector ( vector<float> vec );
vector<string> stringToVector ( string s, string tok ); vector<string> stringToVector ( string s, string tok );
vector<string> stringToVector ( char s, string tok ); vector<string> stringToVector ( char s, string tok );
vector<string> stringToVector ( int s, string tok ); vector<string> stringToVector ( int s, string tok );
vector<int> stringToVectorInt ( string s, string tok ); vector<int> stringToVectorInt ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok ); vector<float> stringToVectorFloat ( string s, string tok );
string lowerCase(string str); string lowerCase(string str);
string removePunct(string str); string removePunct(string str);
string tokenizePunct(string str); string tokenizePunct(string str);
string removePunctTercom(string str); string removePunctTercom(string str);
string normalizeStd(string str); string normalizeStd(string str);
string printParams(param p); string printParams(param p);
string join ( string delim, vector<string> arr ); string join ( string delim, vector<string> arr );
// }; // };
param copyParam(param p); param copyParam(param p);
} }
#endif #endif

View File

@ -43,7 +43,8 @@ private:
}; };
// load hypothesis from candidate output // load hypothesis from candidate output
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) { vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
{
ifstream cand(candFile.c_str()); ifstream cand(candFile.c_str());
if (!cand.good()) throw runtime_error("Error opening candidate file"); if (!cand.good()) throw runtime_error("Error opening candidate file");
@ -61,7 +62,8 @@ vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
} }
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information) // load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) { vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
{
vector<ScoreStats> entries; vector<ScoreStats> entries;
Data data(g_scorer); Data data(g_scorer);
@ -81,8 +83,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
if (nbest_input) { if (nbest_input) {
entries = loadNBest(candFile); entries = loadNBest(candFile);
} } else {
else {
entries = loadCand(candFile); entries = loadCand(candFile);
} }

View File

@ -77,7 +77,7 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp // Command-line processing follows pro.cpp
po::options_description desc("Allowed options"); po::options_description desc("Allowed options");
@ -157,7 +157,7 @@ int main(int argc, char** argv)
do { do {
size_t equals = buffer.find_last_of("="); size_t equals = buffer.find_last_of("=");
UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '" UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
<< buffer << "'"); << buffer << "'");
string name = buffer.substr(0,equals); string name = buffer.substr(0,equals);
names.push_back(name); names.push_back(name);
initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2))); initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
@ -183,7 +183,7 @@ int main(int argc, char** argv)
//Make sure that SparseVector encodes dense feature names as 0..n-1. //Make sure that SparseVector encodes dense feature names as 0..n-1.
for (size_t i = 0; i < names.size(); ++i) { for (size_t i = 0; i < names.size(); ++i) {
size_t id = SparseVector::encode(names[i]); size_t id = SparseVector::encode(names[i]);
assert(id == i); assert(id == i);
if (verbose) cerr << names[i] << " " << initParams[i] << endl; if (verbose) cerr << names[i] << " " << initParams[i] << endl;
} }
@ -246,12 +246,12 @@ int main(int argc, char** argv)
int iNumUpdates = 0; int iNumUpdates = 0;
ValType totalLoss = 0.0; ValType totalLoss = 0.0;
size_t sentenceIndex = 0; size_t sentenceIndex = 0;
for(decoder->reset();!decoder->finished(); decoder->next()) { for(decoder->reset(); !decoder->finished(); decoder->next()) {
HopeFearData hfd; HopeFearData hfd;
decoder->HopeFear(bg,wv,&hfd); decoder->HopeFear(bg,wv,&hfd);
// Update weights // Update weights
if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) { if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference // Vector difference
MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures; MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference // Bleu difference

View File

@ -3,26 +3,27 @@
int main(int argc, char* argv[]){ int main(int argc, char* argv[])
{
const char * is_reordering = "false"; const char * is_reordering = "false";
if (!(argc == 5 || argc == 4)) { if (!(argc == 5 || argc == 4)) {
// Tell the user how to run the program // Tell the user how to run the program
std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl; std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl; std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl; std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
//std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl; //std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
return 1; return 1;
} }
if (argc == 5) { if (argc == 5) {
is_reordering = argv[4]; is_reordering = argv[4];
} }
createProbingPT(argv[1], argv[2], argv[3], is_reordering); createProbingPT(argv[1], argv[2], argv[3], is_reordering);
util::PrintUsage(std::cout); util::PrintUsage(std::cout);
return 0; return 0;
} }

View File

@ -26,36 +26,37 @@
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
int main(int argc, char* argv[]) { int main(int argc, char* argv[])
if (argc != 2) { {
// Tell the user how to run the program if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl; // Tell the user how to run the program
return 1; std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
return 1;
}
QueryEngine queries(argv[1]);
//Interactive search
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
while (true) {
std::string cinstr = "";
getline(std::cin, cinstr);
if (cinstr == "exit") {
break;
} else {
//Actual lookup
std::pair<bool, std::vector<target_text> > query_result;
query_result = queries.query(StringPiece(cinstr));
if (query_result.first) {
queries.printTargetInfo(query_result.second);
} else {
std::cout << "Key not found!" << std::endl;
}
} }
}
QueryEngine queries(argv[1]); util::PrintUsage(std::cout);
//Interactive search return 0;
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
while (true){
std::string cinstr = "";
getline(std::cin, cinstr);
if (cinstr == "exit"){
break;
}else{
//Actual lookup
std::pair<bool, std::vector<target_text> > query_result;
query_result = queries.query(StringPiece(cinstr));
if (query_result.first) {
queries.printTargetInfo(query_result.second);
} else {
std::cout << "Key not found!" << std::endl;
}
}
}
util::PrintUsage(std::cout);
return 0;
} }

View File

@ -53,13 +53,15 @@ using namespace std;
namespace po = boost::program_options; namespace po = boost::program_options;
typedef multimap<float,string> Lines; typedef multimap<float,string> Lines;
static void usage(const po::options_description& desc, char** argv) { static void usage(const po::options_description& desc, char** argv)
cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl; {
cerr << desc << endl; cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
cerr << desc << endl;
} }
//Find top n translations of source, and send them to output //Find top n translations of source, and send them to output
static void outputTopN(Lines lines, size_t maxPhrases, ostream& out) { static void outputTopN(Lines lines, size_t maxPhrases, ostream& out)
{
size_t count = 0; size_t count = 0;
for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) { for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) {
out << i->second << endl; out << i->second << endl;
@ -92,7 +94,7 @@ static void outputTopN(const Phrase& sourcePhrase, const multimap<float,const Ta
out << endl; out << endl;
} }
}*/ }*/
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
bool help; bool help;
string input_file; string input_file;
@ -112,7 +114,7 @@ int main(int argc, char** argv)
cmdline_options.add(desc); cmdline_options.add(desc);
po::variables_map vm; po::variables_map vm;
po::parsed_options parsed = po::command_line_parser(argc,argv). po::parsed_options parsed = po::command_line_parser(argc,argv).
options(cmdline_options).run(); options(cmdline_options).run();
po::store(parsed, vm); po::store(parsed, vm);
po::notify(vm); po::notify(vm);
if (help) { if (help) {
@ -135,7 +137,7 @@ int main(int argc, char** argv)
mosesargs.push_back("-f"); mosesargs.push_back("-f");
mosesargs.push_back(config_file); mosesargs.push_back(config_file);
boost::scoped_ptr<Parameter> params(new Parameter()); boost::scoped_ptr<Parameter> params(new Parameter());
char** mosesargv = new char*[mosesargs.size()]; char** mosesargv = new char*[mosesargs.size()];
for (size_t i = 0; i < mosesargs.size(); ++i) { for (size_t i = 0; i < mosesargs.size(); ++i) {
mosesargv[i] = new char[mosesargs[i].length() + 1]; mosesargv[i] = new char[mosesargs[i].length() + 1];

View File

@ -201,7 +201,7 @@ int main(int argc, char* argv[])
cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| "; cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList); vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
manager.OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(), manager.OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),cout); staticData.GetReportAllFactors(),cout);
} }
} }

View File

@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION; GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif #endif
// echo command line, if verbose // echo command line, if verbose
IFVERBOSE(1) { IFVERBOSE(1) {
TRACE_ERR("command: "); TRACE_ERR("command: ");
@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class // set up read/writing class
IFVERBOSE(1) { IFVERBOSE(1) {
PrintUserTime("Created input-output object"); PrintUserTime("Created input-output object");
} }
IOWrapper* ioWrapper = new IOWrapper(); IOWrapper* ioWrapper = new IOWrapper();
@ -161,28 +161,26 @@ int main(int argc, char** argv)
#ifdef PT_UG #ifdef PT_UG
bool spe = params.isParamSpecified("spe-src"); bool spe = params.isParamSpecified("spe-src");
if (spe) { if (spe) {
// simulated post-editing: always run single-threaded! // simulated post-editing: always run single-threaded!
task->Run(); task->Run();
delete task; delete task;
string src,trg,aln; string src,trg,aln;
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] " UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
<< "missing update data for simulated post-editing."); << "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] " UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
<< "missing update data for simulated post-editing."); << "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] " UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
<< "missing update data for simulated post-editing."); << "missing update data for simulated post-editing.");
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
{ Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd); if (sapt) sapt->add(src,trg,aln);
if (sapt) sapt->add(src,trg,aln); VERBOSE(1,"[" << HERE << " added src] " << src << endl);
VERBOSE(1,"[" << HERE << " added src] " << src << endl); VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl); VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl); }
} } else
}
else
#endif #endif
pool.Submit(task); pool.Submit(task);
#else #else
task->Run(); task->Run();
delete task; delete task;

View File

@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION; GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif #endif
// echo command line, if verbose // echo command line, if verbose
IFVERBOSE(1) { IFVERBOSE(1) {
TRACE_ERR("command: "); TRACE_ERR("command: ");
@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class // set up read/writing class
IFVERBOSE(1) { IFVERBOSE(1) {
PrintUserTime("Created input-output object"); PrintUserTime("Created input-output object");
} }
IOWrapper* ioWrapper = new IOWrapper(); IOWrapper* ioWrapper = new IOWrapper();

View File

@ -46,14 +46,13 @@ public:
* contains such an object then returns a pointer to it; otherwise a new * contains such an object then returns a pointer to it; otherwise a new
* one is inserted. * one is inserted.
*/ */
private: private:
const AlignmentInfo* Add(AlignmentInfo const& ainfo); const AlignmentInfo* Add(AlignmentInfo const& ainfo);
public: public:
template<typename ALNREP> template<typename ALNREP>
AlignmentInfo const * AlignmentInfo const *
Add(ALNREP const & aln) Add(ALNREP const & aln) {
{
return this->Add(AlignmentInfo(aln)); return this->Add(AlignmentInfo(aln));
} }

View File

@ -13,11 +13,11 @@ namespace Moses
* print surface factor only for the given phrase * print surface factor only for the given phrase
*/ */
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase, void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder, const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const bool reportAllFactors) const
{ {
UTIL_THROW_IF2(outputFactorOrder.size() == 0, UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase"); "Cannot be empty phrase");
if (reportAllFactors == true) { if (reportAllFactors == true) {
out << phrase; out << phrase;
} else { } else {
@ -26,12 +26,12 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor; out << *factor;
UTIL_THROW_IF2(factor == NULL, UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos); "Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL, UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos); "Empty factor " << i << " at position " << pos);
out << "|" << *factor; out << "|" << *factor;
} }
@ -45,7 +45,7 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
// but there are scripts and tools that expect the output of -T to look like // but there are scripts and tools that expect the output of -T to look like
// that. // that.
void BaseManager::WriteApplicationContext(std::ostream &out, void BaseManager::WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const const ApplicationContext &context) const
{ {
assert(!context.empty()); assert(!context.empty());
ApplicationContext::const_reverse_iterator p = context.rbegin(); ApplicationContext::const_reverse_iterator p = context.rbegin();

View File

@ -17,23 +17,22 @@ protected:
const InputType &m_source; /**< source sentence to be translated */ const InputType &m_source; /**< source sentence to be translated */
BaseManager(const InputType &source) BaseManager(const InputType &source)
:m_source(source) :m_source(source) {
{} }
// output // output
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext; typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments; typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputSurface(std::ostream &out, void OutputSurface(std::ostream &out,
const Phrase &phrase, const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder, const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const; bool reportAllFactors) const;
void WriteApplicationContext(std::ostream &out, void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const; const ApplicationContext &context) const;
template <class T> template <class T>
void ShiftOffsets(std::vector<T> &offsets, T shift) const void ShiftOffsets(std::vector<T> &offsets, T shift) const {
{
T currPos = shift; T currPos = shift;
for (size_t i = 0; i < offsets.size(); ++i) { for (size_t i = 0; i < offsets.size(); ++i) {
if (offsets[i] == 0) { if (offsets[i] == 0) {
@ -46,8 +45,8 @@ protected:
} }
public: public:
virtual ~BaseManager() virtual ~BaseManager() {
{} }
//! the input sentence being decoded //! the input sentence being decoded
const InputType& GetSource() const { const InputType& GetSource() const {

View File

@ -162,16 +162,16 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
if (m_translations.size() > 1) { if (m_translations.size() > 1) {
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(), UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
"Non-monotonic future score: " "Non-monotonic future score: "
<< m_translations.Get(0)->GetFutureScore() << " vs. " << m_translations.Get(0)->GetFutureScore() << " vs. "
<< m_translations.Get(1)->GetFutureScore()); << m_translations.Get(1)->GetFutureScore());
} }
if (m_hypotheses.size() > 1) { if (m_hypotheses.size() > 1) {
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(), UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
"Non-monotonic total score" "Non-monotonic total score"
<< m_hypotheses[0]->GetTotalScore() << " vs. " << m_hypotheses[0]->GetTotalScore() << " vs. "
<< m_hypotheses[1]->GetTotalScore()); << m_hypotheses[1]->GetTotalScore());
} }
HypothesisScoreOrdererWithDistortion orderer (&transOptRange); HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@ -446,9 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
if (!Empty()) { if (!Empty()) {
HypothesisQueueItem *check = Dequeue(true); HypothesisQueueItem *check = Dequeue(true);
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(), UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
"Non-monotonic total score: " "Non-monotonic total score: "
<< item->GetHypothesis()->GetTotalScore() << " vs. " << item->GetHypothesis()->GetTotalScore() << " vs. "
<< check->GetHypothesis()->GetTotalScore()); << check->GetHypothesis()->GetTotalScore());
} }
// Logging for the criminally insane // Logging for the criminally insane

View File

@ -85,7 +85,7 @@ void ChartCell::PruneToSize()
* \param allChartCells entire chart - needed to look up underlying hypotheses * \param allChartCells entire chart - needed to look up underlying hypotheses
*/ */
void ChartCell::Decode(const ChartTranslationOptionList &transOptList void ChartCell::Decode(const ChartTranslationOptionList &transOptList
, const ChartCellCollection &allChartCells) , const ChartCellCollection &allChartCells)
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();

View File

@ -97,7 +97,7 @@ public:
~ChartCell(); ~ChartCell();
void Decode(const ChartTranslationOptionList &transOptList void Decode(const ChartTranslationOptionList &transOptList
,const ChartCellCollection &allChartCells); ,const ChartCellCollection &allChartCells);
//! Get all hypotheses in the cell that have the specified constituent label //! Get all hypotheses in the cell that have the specified constituent label
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const { const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {

View File

@ -124,8 +124,7 @@ public:
const ChartCellLabel *Find(size_t idx) const { const ChartCellLabel *Find(size_t idx) const {
try { try {
return m_map.at(idx); return m_map.at(idx);
} } catch (const std::out_of_range& oor) {
catch (const std::out_of_range& oor) {
return NULL; return NULL;
} }
} }

View File

@ -61,8 +61,7 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions(); const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
m_prevHypos.reserve(childEntries.size()); m_prevHypos.reserve(childEntries.size());
std::vector<HypothesisDimension>::const_iterator iter; std::vector<HypothesisDimension>::const_iterator iter;
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
{
m_prevHypos.push_back(iter->GetHypothesis()); m_prevHypos.push_back(iter->GetHypothesis());
} }
} }
@ -85,17 +84,14 @@ ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
ChartHypothesis::~ChartHypothesis() ChartHypothesis::~ChartHypothesis()
{ {
// delete feature function states // delete feature function states
for (unsigned i = 0; i < m_ffStates.size(); ++i) for (unsigned i = 0; i < m_ffStates.size(); ++i) {
{
delete m_ffStates[i]; delete m_ffStates[i];
} }
// delete hypotheses that are not in the chart (recombined away) // delete hypotheses that are not in the chart (recombined away)
if (m_arcList) if (m_arcList) {
{
ChartArcList::iterator iter; ChartArcList::iterator iter;
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
{
ChartHypothesis *hypo = *iter; ChartHypothesis *hypo = *iter;
Delete(hypo); Delete(hypo);
} }
@ -112,25 +108,19 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
{ {
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
{
const Word &word = GetCurrTargetPhrase().GetWord(pos); const Word &word = GetCurrTargetPhrase().GetWord(pos);
if (word.IsNonTerminal()) if (word.IsNonTerminal()) {
{
// non-term. fill out with prev hypo // non-term. fill out with prev hypo
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos]; size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd]; const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase); prevHypo->GetOutputPhrase(outPhrase);
} } else {
else
{
outPhrase.AddWord(word); outPhrase.AddWord(word);
if (placeholderFactor != NOT_FOUND) if (placeholderFactor != NOT_FOUND) {
{
std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos); std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
if (sourcePosSet.size() == 1) if (sourcePosSet.size() == 1) {
{
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath(); const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL, UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
"No source rule"); "No source rule");
@ -140,8 +130,7 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
UTIL_THROW_IF2(sourceWord == NULL, UTIL_THROW_IF2(sourceWord == NULL,
"No source word"); "No source word");
const Factor *factor = sourceWord->GetFactor(placeholderFactor); const Factor *factor = sourceWord->GetFactor(placeholderFactor);
if (factor) if (factor) {
{
outPhrase.Back()[0] = factor; outPhrase.Back()[0] = factor;
} }
} }
@ -165,33 +154,24 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr
const TargetPhrase &tp = GetCurrTargetPhrase(); const TargetPhrase &tp = GetCurrTargetPhrase();
size_t targetSize = tp.GetSize(); size_t targetSize = tp.GetSize();
for (size_t i = 0; i < targetSize; ++i) for (size_t i = 0; i < targetSize; ++i) {
{
size_t pos; size_t pos;
if (leftRightMost == 1) if (leftRightMost == 1) {
{
pos = i; pos = i;
} } else if (leftRightMost == 2) {
else if (leftRightMost == 2)
{
pos = targetSize - i - 1; pos = targetSize - i - 1;
} } else {
else
{
abort(); abort();
} }
const Word &word = tp.GetWord(pos); const Word &word = tp.GetWord(pos);
if (word.IsNonTerminal()) if (word.IsNonTerminal()) {
{
// non-term. fill out with prev hypo // non-term. fill out with prev hypo
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos]; size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd]; const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase); prevHypo->GetOutputPhrase(outPhrase);
} } else {
else
{
outPhrase.AddWord(word); outPhrase.AddWord(word);
} }
@ -236,20 +216,16 @@ void ChartHypothesis::EvaluateWhenApplied()
// cached in the translation option-- there is no principled distinction // cached in the translation option-- there is no principled distinction
const std::vector<const StatelessFeatureFunction*>& sfs = const std::vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions(); StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) for (unsigned i = 0; i < sfs.size(); ++i) {
{ if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] ))
{
sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown); sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
} }
} }
const std::vector<const StatefulFeatureFunction*>& ffs = const std::vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions(); StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) for (unsigned i = 0; i < ffs.size(); ++i) {
{ if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] ))
{
m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown); m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
} }
} }
@ -257,7 +233,7 @@ void ChartHypothesis::EvaluateWhenApplied()
// total score from current translation rule // total score from current translation rule
m_totalScore = GetTranslationOption().GetScores().GetWeightedScore(); m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
m_totalScore += m_currScoreBreakdown.GetWeightedScore(); m_totalScore += m_currScoreBreakdown.GetWeightedScore();
// total scores from prev hypos // total scores from prev hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) { for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
const ChartHypothesis &prevHypo = **iter; const ChartHypothesis &prevHypo = **iter;
@ -267,31 +243,25 @@ void ChartHypothesis::EvaluateWhenApplied()
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo) void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
{ {
if (!m_arcList) if (!m_arcList) {
{ if (loserHypo->m_arcList) {
if (loserHypo->m_arcList) // we don't have an arcList, but loser does
{ // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion loserHypo->m_arcList = 0; // prevent a double deletion
} } else {
else
{
this->m_arcList = new ChartArcList(); this->m_arcList = new ChartArcList();
} }
} } else {
else if (loserHypo->m_arcList) {
{ // both have an arc list: merge. delete loser
if (loserHypo->m_arcList)
{ // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size(); size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size(); size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0); this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *)); std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
delete loserHypo->m_arcList; delete loserHypo->m_arcList;
loserHypo->m_arcList = 0; loserHypo->m_arcList = 0;
} } else {
else // loserHypo doesn't have any arcs
{ // loserHypo doesn't have any arcs
// DO NOTHING // DO NOTHING
} }
} }
@ -299,10 +269,8 @@ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
} }
// sorting helper // sorting helper
struct CompareChartHypothesisTotalScore struct CompareChartHypothesisTotalScore {
{ bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const
{
return hypo1->GetTotalScore() > hypo2->GetTotalScore(); return hypo1->GetTotalScore() > hypo2->GetTotalScore();
} }
}; };
@ -322,8 +290,7 @@ void ChartHypothesis::CleanupArcList()
size_t nBestSize = staticData.GetNBestSize(); size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph(); bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
if (!distinctNBest && m_arcList->size() > nBestSize) if (!distinctNBest && m_arcList->size() > nBestSize) {
{
// prune arc list only if there too many arcs // prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin() NTH_ELEMENT4(m_arcList->begin()
, m_arcList->begin() + nBestSize - 1 , m_arcList->begin() + nBestSize - 1
@ -332,8 +299,7 @@ void ChartHypothesis::CleanupArcList()
// delete bad ones // delete bad ones
ChartArcList::iterator iter; ChartArcList::iterator iter;
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
{
ChartHypothesis *arc = *iter; ChartHypothesis *arc = *iter;
ChartHypothesis::Delete(arc); ChartHypothesis::Delete(arc);
} }
@ -343,8 +309,7 @@ void ChartHypothesis::CleanupArcList()
// set all arc's main hypo variable to this hypo // set all arc's main hypo variable to this hypo
ChartArcList::iterator iter = m_arcList->begin(); ChartArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter) for (; iter != m_arcList->end() ; ++iter) {
{
ChartHypothesis *arc = *iter; ChartHypothesis *arc = *iter;
arc->SetWinningHypo(this); arc->SetWinningHypo(this);
} }
@ -367,13 +332,11 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
// recombination // recombination
if (hypo.GetWinningHypothesis() != NULL && if (hypo.GetWinningHypothesis() != NULL &&
hypo.GetWinningHypothesis() != &hypo) hypo.GetWinningHypothesis() != &hypo) {
{
out << "->" << hypo.GetWinningHypothesis()->GetId(); out << "->" << hypo.GetWinningHypothesis()->GetId();
} }
if (StaticData::Instance().GetIncludeLHSInSearchGraph()) if (StaticData::Instance().GetIncludeLHSInSearchGraph()) {
{
out << " " << hypo.GetTargetLHS() << "=>"; out << " " << hypo.GetTargetLHS() << "=>";
} }
out << " " << hypo.GetCurrTargetPhrase() out << " " << hypo.GetCurrTargetPhrase()
@ -381,8 +344,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
<< " " << hypo.GetCurrSourceRange(); << " " << hypo.GetCurrSourceRange();
HypoList::const_iterator iter; HypoList::const_iterator iter;
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
{
const ChartHypothesis &prevHypo = **iter; const ChartHypothesis &prevHypo = **iter;
out << " " << prevHypo.GetId(); out << " " << prevHypo.GetId();
} }

View File

@ -58,8 +58,8 @@ protected:
WordsRange m_currSourceWordsRange; WordsRange m_currSourceWordsRange;
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */ std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */ /*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown; mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown; mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */ ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
,m_lmNGram ,m_lmNGram
,m_lmPrefix; ,m_lmPrefix;
@ -82,21 +82,18 @@ protected:
public: public:
#ifdef USE_HYPO_POOL #ifdef USE_HYPO_POOL
void *operator new(size_t /* num_bytes */) void *operator new(size_t /* num_bytes */) {
{
void *ptr = s_objectPool.getPtr(); void *ptr = s_objectPool.getPtr();
return ptr; return ptr;
} }
//! delete \param hypo. Works with object pool too //! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo) static void Delete(ChartHypothesis *hypo) {
{
s_objectPool.freeObject(hypo); s_objectPool.freeObject(hypo);
} }
#else #else
//! delete \param hypo. Works with object pool too //! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo) static void Delete(ChartHypothesis *hypo) {
{
delete hypo; delete hypo;
} }
#endif #endif
@ -109,43 +106,36 @@ public:
~ChartHypothesis(); ~ChartHypothesis();
unsigned GetId() const unsigned GetId() const {
{
return m_id; return m_id;
} }
const ChartTranslationOption &GetTranslationOption() const const ChartTranslationOption &GetTranslationOption() const {
{
return *m_transOpt; return *m_transOpt;
} }
//! Get the rule that created this hypothesis //! Get the rule that created this hypothesis
const TargetPhrase &GetCurrTargetPhrase() const const TargetPhrase &GetCurrTargetPhrase() const {
{
return m_transOpt->GetPhrase(); return m_transOpt->GetPhrase();
} }
//! the source range that this hypothesis spans //! the source range that this hypothesis spans
const WordsRange &GetCurrSourceRange() const const WordsRange &GetCurrSourceRange() const {
{
return m_currSourceWordsRange; return m_currSourceWordsRange;
} }
//! the arc list when creating n-best lists //! the arc list when creating n-best lists
inline const ChartArcList* GetArcList() const inline const ChartArcList* GetArcList() const {
{
return m_arcList; return m_arcList;
} }
//! the feature function states for a particular feature \param featureID //! the feature function states for a particular feature \param featureID
inline const FFState* GetFFState( size_t featureID ) const inline const FFState* GetFFState( size_t featureID ) const {
{
return m_ffStates[ featureID ]; return m_ffStates[ featureID ];
} }
//! reference back to the manager //! reference back to the manager
inline const ChartManager& GetManager() const inline const ChartManager& GetManager() const {
{
return m_manager; return m_manager;
} }
@ -165,21 +155,17 @@ public:
void SetWinningHypo(const ChartHypothesis *hypo); void SetWinningHypo(const ChartHypothesis *hypo);
//! get the unweighted score for each feature function //! get the unweighted score for each feature function
const ScoreComponentCollection &GetScoreBreakdown() const const ScoreComponentCollection &GetScoreBreakdown() const {
{
// Note: never call this method before m_currScoreBreakdown is fully computed // Note: never call this method before m_currScoreBreakdown is fully computed
if (!m_scoreBreakdown.get()) if (!m_scoreBreakdown.get()) {
{
m_scoreBreakdown.reset(new ScoreComponentCollection()); m_scoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule // score breakdown from current translation rule
if (m_transOpt) if (m_transOpt) {
{
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores()); m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
} }
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown); m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
// score breakdowns from prev hypos // score breakdowns from prev hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
{
const ChartHypothesis &prevHypo = **iter; const ChartHypothesis &prevHypo = **iter;
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown()); m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
} }
@ -188,15 +174,12 @@ public:
} }
//! get the unweighted score delta for each feature function //! get the unweighted score delta for each feature function
const ScoreComponentCollection &GetDeltaScoreBreakdown() const const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
{
// Note: never call this method before m_currScoreBreakdown is fully computed // Note: never call this method before m_currScoreBreakdown is fully computed
if (!m_deltaScoreBreakdown.get()) if (!m_deltaScoreBreakdown.get()) {
{
m_deltaScoreBreakdown.reset(new ScoreComponentCollection()); m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule // score breakdown from current translation rule
if (m_transOpt) if (m_transOpt) {
{
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores()); m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
} }
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown); m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
@ -206,33 +189,28 @@ public:
} }
//! Get the weighted total score //! Get the weighted total score
float GetTotalScore() const float GetTotalScore() const {
{
// scores from current translation rule. eg. translation models & word penalty // scores from current translation rule. eg. translation models & word penalty
return m_totalScore; return m_totalScore;
} }
//! vector of previous hypotheses this hypo is built on //! vector of previous hypotheses this hypo is built on
const std::vector<const ChartHypothesis*> &GetPrevHypos() const const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
{
return m_prevHypos; return m_prevHypos;
} }
//! get a particular previous hypos //! get a particular previous hypos
const ChartHypothesis* GetPrevHypo(size_t pos) const const ChartHypothesis* GetPrevHypo(size_t pos) const {
{
return m_prevHypos[pos]; return m_prevHypos[pos];
} }
//! get the constituency label that covers this hypo //! get the constituency label that covers this hypo
const Word &GetTargetLHS() const const Word &GetTargetLHS() const {
{
return GetCurrTargetPhrase().GetTargetLHS(); return GetCurrTargetPhrase().GetTargetLHS();
} }
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list //! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
const ChartHypothesis* GetWinningHypothesis() const const ChartHypothesis* GetWinningHypothesis() const {
{
return m_winningHypo; return m_winningHypo;
} }

View File

@ -125,7 +125,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
} }
// Generate the score breakdown of the derivation d. // Generate the score breakdown of the derivation d.
boost::shared_ptr<ScoreComponentCollection> boost::shared_ptr<ScoreComponentCollection>
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d) ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
{ {
const ChartHypothesis &hypo = d.edge.head->hypothesis; const ChartHypothesis &hypo = d.edge.head->hypothesis;
@ -169,8 +169,7 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees); mytree->Combine(previous_trees);
return mytree; return mytree;
} } else {
else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
} }
} }

View File

@ -290,12 +290,14 @@ void ChartManager::FindReachableHypotheses(
} }
} }
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const { void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
{
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream); ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
WriteSearchGraph(writer); WriteSearchGraph(writer);
} }
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const { void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
{
ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId()); ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId());
WriteSearchGraph(writer); WriteSearchGraph(writer);
} }
@ -304,33 +306,33 @@ void ChartManager::OutputBest(OutputCollector *collector) const
{ {
const ChartHypothesis *bestHypo = GetBestHypothesis(); const ChartHypothesis *bestHypo = GetBestHypothesis();
if (collector && bestHypo) { if (collector && bestHypo) {
const size_t translationId = m_source.GetTranslationId(); const size_t translationId = m_source.GetTranslationId();
const ChartHypothesis *bestHypo = GetBestHypothesis(); const ChartHypothesis *bestHypo = GetBestHypothesis();
OutputBestHypo(collector, bestHypo, translationId); OutputBestHypo(collector, bestHypo, translationId);
} }
} }
void ChartManager::OutputNBest(OutputCollector *collector) const void ChartManager::OutputNBest(OutputCollector *collector) const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize(); size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0) { if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId(); const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl); VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList; std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest()); CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
OutputNBestList(collector, nBestList, translationId); OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) { IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:"); PrintUserTime("N-Best Hypotheses Generation Time:");
} }
} }
} }
void ChartManager::OutputNBestList(OutputCollector *collector, void ChartManager::OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList, const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const long translationId) const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
@ -344,7 +346,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
} }
bool includeWordAlignment = bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest(); StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees(); bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
@ -357,7 +359,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
// delete <s> and </s> // delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2, UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -405,9 +407,9 @@ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
} }
size_t ChartManager::OutputAlignmentNBest( size_t ChartManager::OutputAlignmentNBest(
Alignments &retAlign, Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation, const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const size_t startTarget) const
{ {
const ChartHypothesis &hypo = derivation.edge.head->hypothesis; const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
@ -448,7 +450,7 @@ size_t ChartManager::OutputAlignmentNBest(
// Recursively look thru child hypos // Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize; size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation, size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget); currStartTarget);
targetOffsets[targetPos] = targetSize; targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize; totalTargetSize += targetSize;
@ -486,22 +488,22 @@ size_t ChartManager::OutputAlignmentNBest(
void ChartManager::OutputAlignment(OutputCollector *collector) const void ChartManager::OutputAlignment(OutputCollector *collector) const
{ {
if (collector == NULL) { if (collector == NULL) {
return; return;
} }
ostringstream out; ostringstream out;
const ChartHypothesis *hypo = GetBestHypothesis(); const ChartHypothesis *hypo = GetBestHypothesis();
if (hypo) { if (hypo) {
Alignments retAlign; Alignments retAlign;
OutputAlignment(retAlign, hypo, 0); OutputAlignment(retAlign, hypo, 0);
// output alignments // output alignments
Alignments::const_iterator iter; Alignments::const_iterator iter;
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) { for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
const pair<size_t, size_t> &alignPoint = *iter; const pair<size_t, size_t> &alignPoint = *iter;
out << alignPoint.first << "-" << alignPoint.second << " "; out << alignPoint.first << "-" << alignPoint.second << " ";
} }
} }
out << endl; out << endl;
@ -510,8 +512,8 @@ void ChartManager::OutputAlignment(OutputCollector *collector) const
} }
size_t ChartManager::OutputAlignment(Alignments &retAlign, size_t ChartManager::OutputAlignment(Alignments &retAlign,
const Moses::ChartHypothesis *hypo, const Moses::ChartHypothesis *hypo,
size_t startTarget) const size_t startTarget) const
{ {
size_t totalTargetSize = 0; size_t totalTargetSize = 0;
size_t startSource = hypo->GetCurrSourceRange().GetStartPos(); size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
@ -536,7 +538,7 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
size_t targetInd = 0; size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) { if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos]; size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd]; size_t sourcePos = sourceInd2pos[sourceInd];
@ -587,19 +589,19 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
{ {
if (collector) { if (collector) {
OutputDetailedTranslationReport(collector, OutputDetailedTranslationReport(collector,
GetBestHypothesis(), GetBestHypothesis(),
static_cast<const Sentence&>(m_source), static_cast<const Sentence&>(m_source),
m_source.GetTranslationId()); m_source.GetTranslationId());
} }
} }
void ChartManager::OutputDetailedTranslationReport( void ChartManager::OutputDetailedTranslationReport(
OutputCollector *collector, OutputCollector *collector,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const long translationId) const
{ {
if (hypo == NULL) { if (hypo == NULL) {
return; return;
@ -610,24 +612,24 @@ void ChartManager::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId); OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
collector->Write(translationId, out.str()); collector->Write(translationId, out.str());
//DIMw //DIMw
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
if (staticData.IsDetailedAllTranslationReportingEnabled()) { if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source); const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.GetNBestSize(); size_t nBestSize = staticData.GetNBestSize();
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList; std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest()); CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId); OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
} }
} }
void ChartManager::OutputTranslationOptions(std::ostream &out, void ChartManager::OutputTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const long translationId) const
{ {
if (hypo != NULL) { if (hypo != NULL) {
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId); OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
@ -644,10 +646,10 @@ void ChartManager::OutputTranslationOptions(std::ostream &out,
} }
void ChartManager::OutputTranslationOption(std::ostream &out, void ChartManager::OutputTranslationOption(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const long translationId) const
{ {
ReconstructApplicationContext(*hypo, sentence, applicationContext); ReconstructApplicationContext(*hypo, sentence, applicationContext);
out << "Trans Opt " << translationId out << "Trans Opt " << translationId
@ -691,16 +693,16 @@ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
void ChartManager::OutputUnknowns(OutputCollector *collector) const void ChartManager::OutputUnknowns(OutputCollector *collector) const
{ {
if (collector) { if (collector) {
long translationId = m_source.GetTranslationId(); long translationId = m_source.GetTranslationId();
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources(); const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
std::ostringstream out; std::ostringstream out;
for (std::vector<Phrase*>::const_iterator p = oovs.begin(); for (std::vector<Phrase*>::const_iterator p = oovs.begin();
p != oovs.end(); ++p) { p != oovs.end(); ++p) {
out << *p; out << *p;
} }
out << std::endl; out << std::endl;
collector->Write(translationId, out.str()); collector->Write(translationId, out.str());
} }
} }
@ -709,7 +711,7 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
{ {
const ChartHypothesis *hypo = GetBestHypothesis(); const ChartHypothesis *hypo = GetBestHypothesis();
if (collector == NULL || hypo == NULL) { if (collector == NULL || hypo == NULL) {
return; return;
} }
std::ostringstream out; std::ostringstream out;
@ -723,14 +725,14 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
//Tree of full sentence //Tree of full sentence
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure(); const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
if (treeStructure != NULL) { if (treeStructure != NULL) {
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) { for( size_t i=0; i<sff.size(); i++ ) {
if (sff[i] == treeStructure) { if (sff[i] == treeStructure) {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i)); const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n"; out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break; break;
} }
} }
} }
collector->Write(translationId, out.str()); collector->Write(translationId, out.str());
@ -738,10 +740,10 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
} }
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out, void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const long translationId) const
{ {
if (hypo != NULL) { if (hypo != NULL) {
@ -769,20 +771,20 @@ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
void ChartManager::OutputSearchGraph(OutputCollector *collector) const void ChartManager::OutputSearchGraph(OutputCollector *collector) const
{ {
if (collector) { if (collector) {
long translationId = m_source.GetTranslationId(); long translationId = m_source.GetTranslationId();
std::ostringstream out; std::ostringstream out;
OutputSearchGraphMoses( out); OutputSearchGraphMoses( out);
collector->Write(translationId, out.str()); collector->Write(translationId, out.str());
} }
} }
//DIMw //DIMw
void ChartManager::OutputDetailedAllTranslationReport( void ChartManager::OutputDetailedAllTranslationReport(
OutputCollector *collector, OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence, const Sentence &sentence,
long translationId) const long translationId) const
{ {
std::ostringstream out; std::ostringstream out;
ApplicationContext applicationContext; ApplicationContext applicationContext;
@ -813,8 +815,8 @@ void ChartManager::OutputSearchGraphHypergraph() const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) { if (staticData.GetOutputSearchGraphHypergraph()) {
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION); HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
hypergraphOutputChart.Write(*this); hypergraphOutputChart.Write(*this);
} }
} }
@ -842,7 +844,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
// delete 1st & last // delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2, UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0); outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1); outPhrase.RemoveWord(outPhrase.GetSize() - 1);

View File

@ -56,49 +56,49 @@ private:
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */ ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
/* auxilliary functions for SearchGraphs */ /* auxilliary functions for SearchGraphs */
void FindReachableHypotheses( void FindReachableHypotheses(
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const; const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const; void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
// output // output
void OutputNBestList(OutputCollector *collector, void OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList, const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const; long translationId) const;
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const; size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
size_t OutputAlignmentNBest(Alignments &retAlign, size_t OutputAlignmentNBest(Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation, const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const; size_t startTarget) const;
size_t OutputAlignment(Alignments &retAlign, size_t OutputAlignment(Alignments &retAlign,
const Moses::ChartHypothesis *hypo, const Moses::ChartHypothesis *hypo,
size_t startTarget) const; size_t startTarget) const;
void OutputDetailedTranslationReport( void OutputDetailedTranslationReport(
OutputCollector *collector, OutputCollector *collector,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const; long translationId) const;
void OutputTranslationOptions(std::ostream &out, void OutputTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const; long translationId) const;
void OutputTranslationOption(std::ostream &out, void OutputTranslationOption(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const; long translationId) const;
void ReconstructApplicationContext(const ChartHypothesis &hypo, void ReconstructApplicationContext(const ChartHypothesis &hypo,
const Sentence &sentence, const Sentence &sentence,
ApplicationContext &context) const; ApplicationContext &context) const;
void OutputTreeFragmentsTranslationOptions(std::ostream &out, void OutputTreeFragmentsTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext, ApplicationContext &applicationContext,
const ChartHypothesis *hypo, const ChartHypothesis *hypo,
const Sentence &sentence, const Sentence &sentence,
long translationId) const; long translationId) const;
void OutputDetailedAllTranslationReport( void OutputDetailedAllTranslationReport(
OutputCollector *collector, OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence, const Sentence &sentence,
long translationId) const; long translationId) const;
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const; void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
void Backtrack(const ChartHypothesis *hypo) const; void Backtrack(const ChartHypothesis *hypo) const;
@ -126,8 +126,8 @@ public:
return m_hypoStackColl; return m_hypoStackColl;
} }
void CalcDecoderStatistics() const void CalcDecoderStatistics() const {
{} }
void ResetSentenceStats(const InputType& source) { void ResetSentenceStats(const InputType& source) {
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source)); m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
@ -138,22 +138,24 @@ public:
return m_hypothesisId++; return m_hypothesisId++;
} }
const ChartParser &GetParser() const { return m_parser; } const ChartParser &GetParser() const {
return m_parser;
}
// outputs // outputs
void OutputBest(OutputCollector *collector) const; void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const; void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const void OutputLatticeSamples(OutputCollector *collector) const {
{} }
void OutputAlignment(OutputCollector *collector) const; void OutputAlignment(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const; void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const; void OutputUnknowns(OutputCollector *collector) const;
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const; void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
void OutputWordGraph(OutputCollector *collector) const void OutputWordGraph(OutputCollector *collector) const {
{} }
void OutputSearchGraph(OutputCollector *collector) const; void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const void OutputSearchGraphSLF() const {
{} }
void OutputSearchGraphHypergraph() const; void OutputSearchGraphHypergraph() const;
}; };

View File

@ -65,7 +65,7 @@ public:
* \param outColl return argument * \param outColl return argument
*/ */
virtual void GetChartRuleCollection( virtual void GetChartRuleCollection(
const InputPath &inputPath, const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl) = 0; ChartParserCallback &outColl) = 0;

View File

@ -11,8 +11,8 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
} }
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input, void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath, const InputPath &inputPath,
const StackVec &stackVec) const StackVec &stackVec)
{ {
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions(); const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();

View File

@ -46,8 +46,8 @@ public:
} }
void EvaluateWithSourceContext(const InputType &input, void EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath, const InputPath &inputPath,
const StackVec &stackVec); const StackVec &stackVec);
}; };
} }

View File

@ -71,10 +71,9 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
ChartTranslationOption *transOpt = m_collection[i].get(); ChartTranslationOption *transOpt = m_collection[i].get();
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) { if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
++numDiscard; ++numDiscard;
} } else if (numDiscard) {
else if (numDiscard) { m_collection[i - numDiscard] = m_collection[i];
m_collection[i - numDiscard] = m_collection[i];
} }
} }
@ -135,12 +134,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj) std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
{ {
for (size_t i = 0; i < obj.m_collection.size(); ++i) { for (size_t i = 0; i < obj.m_collection.size(); ++i) {
const ChartTranslationOption &transOpt = *obj.m_collection[i]; const ChartTranslationOption &transOpt = *obj.m_collection[i];
out << transOpt << endl; out << transOpt << endl;
} }
return out; return out;
} }
} }

View File

@ -13,297 +13,297 @@
namespace Moses namespace Moses
{ {
struct CNStats { struct CNStats {
size_t created,destr,read,colls,words; size_t created,destr,read,colls,words;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {
print(std::cerr);
}
void createOne() { CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
++created; ~CNStats() {
} print(std::cerr);
void destroyOne() {
++destr;
}
void collect(const ConfusionNet& cn) {
++read;
colls+=cn.GetSize();
for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
}
void print(std::ostream& out) const {
if(created>0) {
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
CNStats stats;
size_t
ConfusionNet::
GetColumnIncrement(size_t i, size_t j) const
{
(void) i;
(void) j;
return 1;
} }
ConfusionNet:: void createOne() {
ConfusionNet() ++created;
: InputType() }
{ void destroyOne() {
stats.createOne(); ++destr;
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
} }
ConfusionNet:: void collect(const ConfusionNet& cn) {
~ConfusionNet() ++read;
{ colls+=cn.GetSize();
stats.destroyOne(); for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
} }
void print(std::ostream& out) const {
ConfusionNet:: if(created>0) {
ConfusionNet(Sentence const& s) out<<"confusion net statistics:\n"
{ " created:\t"<<created<<"\n"
data.resize(s.GetSize()); " destroyed:\t"<<destr<<"\n"
for(size_t i=0; i<s.GetSize(); ++i) { " succ. read:\t"<<read<<"\n"
ScorePair scorePair; " columns:\t"<<colls<<"\n"
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair); " words:\t"<<words<<"\n"
data[i].push_back(temp); " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
} }
} }
};
bool CNStats stats;
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::cerr << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
}
return false;
}
int size_t
ConfusionNet:: ConfusionNet::
Read(std::istream& in, GetColumnIncrement(size_t i, size_t j) const
const std::vector<FactorType>& factorOrder) {
{ (void) i;
int rv=ReadF(in,factorOrder,0); (void) j;
if(rv) stats.collect(*this); return 1;
return rv; }
ConfusionNet::
ConfusionNet()
: InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
} }
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}
ConfusionNet::
~ConfusionNet()
{
stats.destroyOne();
}
ConfusionNet::
ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0; i<s.GetSize(); ++i) {
ScorePair scorePair;
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
data[i].push_back(temp);
}
}
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::cerr << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
}
return false;
}
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
}
#if 0 #if 0
// Deprecated due to code duplication; // Deprecated due to code duplication;
// use Word::CreateFromString() instead // use Word::CreateFromString() instead
void void
ConfusionNet:: ConfusionNet::
String2Word(const std::string& s,Word& w, String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder) const std::vector<FactorType>& factorOrder)
{ {
std::vector<std::string> factorStrVector = Tokenize(s, "|"); std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i) for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i], w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i])); (Input,factorOrder[i], factorStrVector[i]));
} }
#endif #endif
bool bool
ConfusionNet:: ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder) ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
{ {
Clear(); Clear();
// const StaticData &staticData = StaticData::Instance(); // const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance(); const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores(); size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput(); size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
size_t totalCount = numInputScores + numRealWordCount; size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0); bool addRealWordCount = (numRealWordCount > 0);
std::string line; std::string line;
while(getline(in,line)) { while(getline(in,line)) {
std::istringstream is(line); std::istringstream is(line);
std::string word; std::string word;
Column col; Column col;
while(is>>word) { while(is>>word) {
Word w; Word w;
// String2Word(word,w,factorOrder); // String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0); std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) { for(size_t i=0; i < numInputScores; i++) {
double prob; double prob;
if (!(is>>prob)) { if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
return false; return false;
} }
if(prob<0.0) { if(prob<0.0) {
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n"); VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
prob=0.0; prob=0.0;
} else if (prob>1.0) { } else if (prob>1.0) {
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n"); VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
prob=1.0; prob=1.0;
} }
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE)); probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;
ScorePair scorePair(probs);
col.push_back(std::make_pair(w,scorePair));
} }
if(col.size()) { //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
data.push_back(col); if (addRealWordCount && word!=EPSILON && word!="")
ShrinkToFit(data.back()); probs.back() = -1.0;
} else break;
}
return !data.empty();
}
bool ScorePair scorePair(probs);
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder) col.push_back(std::make_pair(w,scorePair));
{ }
Clear(); if(col.size()) {
std::string line; data.push_back(col);
ShrinkToFit(data.back());
} else break;
}
return !data.empty();
}
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
std::string line;
if(!getline(in,line)) return 0;
size_t s;
if(getline(in,line)) s=atoi(line.c_str());
else return 0;
data.resize(s);
for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0; if(!getline(in,line)) return 0;
size_t s; std::istringstream is(line);
if(getline(in,line)) s=atoi(line.c_str()); if(!(is>>s)) return 0;
else return 0; std::string word;
data.resize(s); double prob;
for(size_t i=0; i<data.size(); ++i) { data[i].resize(s);
if(!getline(in,line)) return 0; for(size_t j=0; j<s; ++j)
std::istringstream is(line); if(is>>word>>prob) {
if(!(is>>s)) return 0; //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
std::string word; data[i][j].second.denseScores = std::vector<float> (1);
double prob; data[i][j].second.denseScores.push_back((float) log(prob));
data[i].resize(s); if(data[i][j].second.denseScores[0]<0) {
for(size_t j=0; j<s; ++j) VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
if(is>>word>>prob) { data[i][j].second.denseScores[0]=0.0;
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS }
data[i][j].second.denseScores = std::vector<float> (1); // String2Word(word,data[i][j].first,factorOrder);
data[i][j].second.denseScores.push_back((float) log(prob)); Word& w = data[i][j].first;
if(data[i][j].second.denseScores[0]<0) { w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n"); } else return 0;
data[i][j].second.denseScores[0]=0.0;
}
// String2Word(word,data[i][j].first,factorOrder);
Word& w = data[i][j].first;
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
} else return 0;
}
return !data.empty();
} }
return !data.empty();
}
void ConfusionNet::Print(std::ostream& out) const void ConfusionNet::Print(std::ostream& out) const
{ {
out<<"conf net: "<<data.size()<<"\n"; out<<"conf net: "<<data.size()<<"\n";
for(size_t i=0; i<data.size(); ++i) { for(size_t i=0; i<data.size(); ++i) {
out<<i<<" -- "; out<<i<<" -- ";
for(size_t j=0; j<data[i].size(); ++j) { for(size_t j=0; j<data[i].size(); ++j) {
out<<"("<<data[i][j].first.ToString()<<", "; out<<"("<<data[i][j].first.ToString()<<", ";
// dense // dense
std::vector<float>::const_iterator iterDense; std::vector<float>::const_iterator iterDense;
for(iterDense = data[i][j].second.denseScores.begin(); for(iterDense = data[i][j].second.denseScores.begin();
iterDense < data[i][j].second.denseScores.end(); iterDense < data[i][j].second.denseScores.end();
++iterDense) { ++iterDense) {
out<<", "<<*iterDense; out<<", "<<*iterDense;
}
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
} }
out<<"\n";
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
} }
out<<"\n\n"; out<<"\n";
} }
out<<"\n\n";
}
#ifdef _WIN32 #ifdef _WIN32
#pragma warning(disable:4716) #pragma warning(disable:4716)
#endif #endif
Phrase Phrase
ConfusionNet:: ConfusionNet::
GetSubString(const WordsRange&) const GetSubString(const WordsRange&) const
{ {
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n"); UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
//return Phrase(Input); //return Phrase(Input);
} }
std::string std::string
ConfusionNet:: ConfusionNet::
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
{ {
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n"); TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return ""; return "";
} }
#ifdef _WIN32 #ifdef _WIN32
#pragma warning(disable:4716) #pragma warning(disable:4716)
#endif #endif
const Word& ConfusionNet::GetWord(size_t) const const Word& ConfusionNet::GetWord(size_t) const
{ {
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n"); UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
} }
#ifdef _WIN32 #ifdef _WIN32
#pragma warning(default:4716) #pragma warning(default:4716)
#endif #endif
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn) std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
{ {
cn.Print(out); cn.Print(out);
return out; return out;
} }
TranslationOptionCollection* TranslationOptionCollection*
ConfusionNet:: ConfusionNet::
CreateTranslationOptionCollection() const CreateTranslationOptionCollection() const
{ {
size_t maxNoTransOptPerCoverage size_t maxNoTransOptPerCoverage
= StaticData::Instance().GetMaxNoTransOptPerCoverage(); = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold float translationOptionThreshold
= StaticData::Instance().GetTranslationOptionThreshold(); = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet = new TranslationOptionCollectionConfusionNet
(*this, maxNoTransOptPerCoverage, translationOptionThreshold); (*this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv); assert(rv);
return rv; return rv;
} }
} }

View File

@ -49,8 +49,8 @@ public:
DecodeGraph(size_t id) DecodeGraph(size_t id)
: m_id(id) : m_id(id)
, m_maxChartSpan(NOT_FOUND) , m_maxChartSpan(NOT_FOUND)
, m_backoff(0) , m_backoff(0) {
{} }
// for chart decoding // for chart decoding
DecodeGraph(size_t id, size_t maxChartSpan) DecodeGraph(size_t id, size_t maxChartSpan)

View File

@ -198,11 +198,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
const Word *wordIP = NULL; const Word *wordIP = NULL;
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) { for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
const Word &tempWord = phraseFromIP.GetWord(i); const Word &tempWord = phraseFromIP.GetWord(i);
if (!tempWord.IsEpsilon()) { if (!tempWord.IsEpsilon()) {
wordIP = &tempWord; wordIP = &tempWord;
break; break;
} }
} }
// const WordsRange &range = inputPath.GetWordsRange(); // const WordsRange &range = inputPath.GetWordsRange();
@ -237,7 +237,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
const size_t tableLimit = phraseDictionary->GetTableLimit(); const size_t tableLimit = phraseDictionary->GetTableLimit();
const TargetPhraseCollectionWithSourcePhrase *phraseColl const TargetPhraseCollectionWithSourcePhrase *phraseColl
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange); = phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
if (phraseColl != NULL) { if (phraseColl != NULL) {

View File

@ -502,8 +502,8 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
* phrase translated. * phrase translated.
*/ */
FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const
{ {
if (!m_enabled) return new BleuScoreState(); if (!m_enabled) return new BleuScoreState();

View File

@ -116,27 +116,27 @@ public:
size_t skip = 0) const; size_t skip = 0) const;
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID, int featureID,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
bool Enabled() const { bool Enabled() const {
return m_enabled; return m_enabled;

View File

@ -11,8 +11,8 @@ namespace Moses
class ConstrainedDecodingState : public FFState class ConstrainedDecodingState : public FFState
{ {
public: public:
ConstrainedDecodingState() ConstrainedDecodingState() {
{} }
ConstrainedDecodingState(const Hypothesis &hypo); ConstrainedDecodingState(const Hypothesis &hypo);
ConstrainedDecodingState(const ChartHypothesis &hypo); ConstrainedDecodingState(const ChartHypothesis &hypo);
@ -42,23 +42,23 @@ public:
} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo, const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,

View File

@ -20,8 +20,8 @@ class ControlRecombinationState : public FFState
{ {
public: public:
ControlRecombinationState(const ControlRecombination &ff) ControlRecombinationState(const ControlRecombination &ff)
:m_ff(ff) :m_ff(ff) {
{} }
ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff); ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff);
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff); ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
@ -58,22 +58,22 @@ public:
} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo, const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,

View File

@ -8,18 +8,18 @@ using namespace std;
namespace Moses namespace Moses
{ {
CountNonTerms::CountNonTerms(const std::string &line) CountNonTerms::CountNonTerms(const std::string &line)
:StatelessFeatureFunction(line) :StatelessFeatureFunction(line)
,m_all(true) ,m_all(true)
,m_sourceSyntax(false) ,m_sourceSyntax(false)
,m_targetSyntax(false) ,m_targetSyntax(false)
{ {
ReadParameters(); ReadParameters();
} }
void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
@ -27,33 +27,33 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
size_t indScore = 0; size_t indScore = 0;
if (m_all) { if (m_all) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i); const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal()) { if (word.IsNonTerminal()) {
++scores[indScore]; ++scores[indScore];
} }
} }
++indScore; ++indScore;
} }
if (m_targetSyntax) { if (m_targetSyntax) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i); const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) { if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
++scores[indScore]; ++scores[indScore];
} }
} }
++indScore; ++indScore;
} }
if (m_sourceSyntax) { if (m_sourceSyntax) {
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) { for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i); const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) { if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
++scores[indScore]; ++scores[indScore];
} }
} }
++indScore; ++indScore;
} }
scoreBreakdown.PlusEquals(this, scores); scoreBreakdown.PlusEquals(this, scores);
@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
if (key == "all") { if (key == "all") {
m_all = Scan<bool>(value); m_all = Scan<bool>(value);
} else if (key == "source-syntax") { } else if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value); m_sourceSyntax = Scan<bool>(value);
} else if (key == "target-syntax") { } else if (key == "target-syntax") {
m_targetSyntax = Scan<bool>(value); m_targetSyntax = Scan<bool>(value);
} else { } else {
StatelessFeatureFunction::SetParameter(key, value); StatelessFeatureFunction::SetParameter(key, value);
} }

View File

@ -14,30 +14,30 @@ public:
} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const; , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateWhenApplied(const Hypothesis& hypo, void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied( void EvaluateWhenApplied(
const ChartHypothesis& hypo, const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void SetParameter(const std::string& key, const std::string& value); void SetParameter(const std::string& key, const std::string& value);

View File

@ -22,44 +22,44 @@ int CoveredReferenceState::Compare(const FFState& other) const
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other); const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
if (m_coveredRef.size() != otherState.m_coveredRef.size()) { if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1; return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
} else { } else {
multiset<string>::const_iterator thisIt, otherIt; multiset<string>::const_iterator thisIt, otherIt;
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin(); for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
thisIt != m_coveredRef.end(); thisIt != m_coveredRef.end();
thisIt++, otherIt++) { thisIt++, otherIt++) {
if (*thisIt != *otherIt) return thisIt->compare(*otherIt); if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
} }
} }
return 0; return 0;
// return m_coveredRef == otherState.m_coveredRef; // return m_coveredRef == otherState.m_coveredRef;
// if (m_coveredRef == otherState.m_coveredRef) // if (m_coveredRef == otherState.m_coveredRef)
// return 0; // return 0;
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1; // return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
} }
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{} {}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const , ScoreComponentCollection *estimatedFutureScore) const
{ {
long id = input.GetTranslationId(); long id = input.GetTranslationId();
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id); boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase); multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
multiset<string> covered; multiset<string> covered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
refIt->second.begin(), refIt->second.end(), refIt->second.begin(), refIt->second.end(),
inserter(covered, covered.begin())); inserter(covered, covered.begin()));
vector<float> scores; vector<float> scores;
scores.push_back(covered.size()); scores.push_back(covered.size());
@ -67,7 +67,8 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
estimatedFutureScore->Assign(this, scores); estimatedFutureScore->Assign(this, scores);
} }
void CoveredReferenceFeature::Load() { void CoveredReferenceFeature::Load()
{
InputFileStream refFile(m_path); InputFileStream refFile(m_path);
std::string line; std::string line;
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
@ -76,7 +77,7 @@ void CoveredReferenceFeature::Load() {
vector<string> words = Tokenize(line, " "); vector<string> words = Tokenize(line, " ");
multiset<string> wordSet; multiset<string> wordSet;
// TODO make Tokenize work with other containers than vector // TODO make Tokenize work with other containers than vector
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin())); copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
m_refs.insert(make_pair(sentenceID++, wordSet)); m_refs.insert(make_pair(sentenceID++, wordSet));
} }
} }
@ -107,15 +108,15 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id); boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id)); if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
set_difference(refIt->second.begin(), refIt->second.end(), set_difference(refIt->second.begin(), refIt->second.end(),
ret->m_coveredRef.begin(), ret->m_coveredRef.end(), ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
inserter(remaining, remaining.begin())); inserter(remaining, remaining.begin()));
// which of the remaining words are present in the current phrase // which of the remaining words are present in the current phrase
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase()); multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
multiset<string> newCovered; multiset<string> newCovered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
remaining.begin(), remaining.end(), remaining.begin(), remaining.end(),
inserter(newCovered, newCovered.begin())); inserter(newCovered, newCovered.begin()));
vector<float> estimateScore = vector<float> estimateScore =
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this); cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);

View File

@ -52,20 +52,20 @@ public:
} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const; , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const; , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo, const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,

View File

@ -63,30 +63,30 @@ public:
void SetParameter(const std::string& key, const std::string& value); void SetParameter(const std::string& key, const std::string& value);
void EvaluateWhenApplied(const Hypothesis& hypo, void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied(const ChartHypothesis &hypo, void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge, void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void SetContainer(const DecodeStep *container) { void SetContainer(const DecodeStep *container) {
m_container = container; m_container = container;

View File

@ -48,22 +48,22 @@ public:
} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
}; };
} }

View File

@ -93,12 +93,16 @@ public:
} }
static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) { static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; } if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name]; return s_instance_map[name];
} }
static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) { static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; } if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name]; return s_instance_map[name];
} }
@ -126,29 +130,29 @@ public:
void Clear(); void Clear();
virtual void EvaluateInIsolation(const Phrase &source virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const; , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateWhenApplied(const Hypothesis& hypo, void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied(const ChartHypothesis &hypo, void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void SetQueryType(size_t type); void SetQueryType(size_t type);
void SetScoreType(size_t type); void SetScoreType(size_t type);

View File

@ -18,8 +18,8 @@ protected:
public: public:
ExternalFeatureState(int stateSize) ExternalFeatureState(int stateSize)
:m_stateSize(stateSize) :m_stateSize(stateSize)
,m_data(NULL) ,m_data(NULL) {
{} }
ExternalFeatureState(int stateSize, void *data); ExternalFeatureState(int stateSize, void *data);
~ExternalFeatureState() { ~ExternalFeatureState() {
@ -52,22 +52,22 @@ public:
void SetParameter(const std::string& key, const std::string& value); void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo, const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,

View File

@ -242,7 +242,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonChangeInput); MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonTranslationOptionListFeature); MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT); MOSES_FNAME(SkeletonPT);
#ifdef HAVE_VW #ifdef HAVE_VW
MOSES_FNAME(VW); MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureSourceBagOfWords); MOSES_FNAME(VWFeatureSourceBagOfWords);
@ -322,22 +322,22 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const void FeatureRegistry::PrintFF() const
{ {
vector<string> ffs; vector<string> ffs;
std::cerr << "Available feature functions:" << std::endl; std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter; Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) { for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first; const string &ffName = iter->first;
ffs.push_back(ffName); ffs.push_back(ffName);
} }
vector<string>::const_iterator iterVec; vector<string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end()); std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) { for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const string &ffName = *iterVec; const string &ffName = *iterVec;
std::cerr << ffName << " "; std::cerr << ffName << " ";
} }
std::cerr << std::endl; std::cerr << std::endl;
} }
} // namespace Moses } // namespace Moses

View File

@ -38,8 +38,8 @@ void FeatureFunction::Destroy()
void FeatureFunction::CallChangeSource(InputType *&input) void FeatureFunction::CallChangeSource(InputType *&input)
{ {
for (size_t i = 0; i < s_staticColl.size(); ++i) { for (size_t i = 0; i < s_staticColl.size(); ++i) {
const FeatureFunction &ff = *s_staticColl[i]; const FeatureFunction &ff = *s_staticColl[i];
ff.ChangeSource(input); ff.ChangeSource(input);
} }
} }

View File

@ -111,13 +111,13 @@ public:
// may have more factors than actually need, but not guaranteed. // may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence // For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
virtual void EvaluateInIsolation(const Phrase &source virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const = 0; , ScoreComponentCollection &estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding // override this method if you want to change the input before decoding
virtual void ChangeSource(InputType *&input) const virtual void ChangeSource(InputType *&input) const {
{} }
// This method is called once all the translation options are retrieved from the phrase table, and // This method is called once all the translation options are retrieved from the phrase table, and
// just before search. // just before search.
@ -127,12 +127,12 @@ public:
// For pb models, stackvec is NULL. // For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads! // No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateWithSourceContext(const InputType &input virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const = 0; , ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
// This method is called once all the translation options are retrieved from the phrase table, and // This method is called once all the translation options are retrieved from the phrase table, and
// just before search. // just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away // 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
@ -141,7 +141,7 @@ public:
// For pb models, stackvec is NULL. // For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads! // No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const = 0; , const TranslationOptionList &translationOptionList) const = 0;
virtual void SetParameter(const std::string& key, const std::string& value); virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters(); virtual void ReadParameters();

View File

@ -165,11 +165,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
} }
void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) ); scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
} }
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const

View File

@ -71,29 +71,29 @@ public:
bool IsUseable(const FactorMask &mask) const; bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const; , ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo, void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied(const ChartHypothesis &hypo, void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
}; };
} }

View File

@ -27,8 +27,8 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
// read optional punctuation and bias specifications // read optional punctuation and bias specifications
if (spec.size() > 0) { if (spec.size() > 0) {
if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) { if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) {
std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] " std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
<< "[context-type] [filename-src filename-tgt]"; << "[context-type] [filename-src filename-tgt]";
//return false; //return false;
} }
@ -48,7 +48,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
factors = Tokenize(modelSpec[i],"-"); factors = Tokenize(modelSpec[i],"-");
if ( factors.size() != 2 ) { if ( factors.size() != 2 ) {
std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i]; std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
//return false; //return false;
} }
@ -60,10 +60,10 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
if (restricted) { if (restricted) {
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl; cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
if (!glmu->Load(filenameSource, filenameTarget)) { if (!glmu->Load(filenameSource, filenameTarget)) {
std::cerr << "Unable to load word lists for word translation feature from files " std::cerr << "Unable to load word lists for word translation feature from files "
<< filenameSource << filenameSource
<< " and " << " and "
<< filenameTarget; << filenameTarget;
//return false; //return false;
} }
} }

View File

@ -82,31 +82,31 @@ public:
//TODO: This implements the old interface, but cannot be updated because //TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful //it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo, void EvaluateWhenApplied(const Hypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */, void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
int /* featureID */, int /* featureID */,
ScoreComponentCollection* ) const { ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet"); throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void AddFeature(ScoreComponentCollection* accumulator, void AddFeature(ScoreComponentCollection* accumulator,
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger, StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,

View File

@ -19,33 +19,33 @@ public:
} }
virtual void EvaluateInIsolation(const Phrase &source virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
virtual void EvaluateWithSourceContext(const InputType &input virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
virtual void EvaluateWhenApplied(const Hypothesis& hypo, virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
/** /**
* Same for chart-based features. * Same for chart-based features.
**/ **/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
}; };

View File

@ -45,11 +45,11 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
} }
void InputFeature::EvaluateWithSourceContext(const InputType &input void InputFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const , ScoreComponentCollection *estimatedFutureScore) const
{ {
if (m_legacy) { if (m_legacy) {
//binary phrase-table does input feature itself //binary phrase-table does input feature itself

View File

@ -42,28 +42,28 @@ public:
} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const; , ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateWhenApplied(const Hypothesis& hypo, void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
void EvaluateWhenApplied(const ChartHypothesis &hypo, void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const {
{} }
}; };

View File

@ -4,236 +4,241 @@ namespace Moses
{ {
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal): InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
m_value_nt(0), m_value_nt(0),
m_isTerminal(terminal) m_isTerminal(terminal)
{ {
if (len > 0) { if (len > 0) {
m_value.assign(line, start, len); m_value.assign(line, start, len);
} }
} }
InternalTree::InternalTree(const std::string & line, const bool terminal): InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0), m_value_nt(0),
m_isTerminal(terminal) m_isTerminal(terminal)
{ {
size_t found = line.find_first_of("[] "); size_t found = line.find_first_of("[] ");
if (found == line.npos) { if (found == line.npos) {
m_value = line; m_value = line;
} } else {
else { AddSubTree(line, 0);
AddSubTree(line, 0); }
}
} }
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) { size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
{
char token = 0; char token = 0;
size_t len = 0; size_t len = 0;
while (token != ']' && pos != std::string::npos) while (token != ']' && pos != std::string::npos) {
{ size_t oldpos = pos;
size_t oldpos = pos; pos = line.find_first_of("[] ", pos);
pos = line.find_first_of("[] ", pos); if (pos == std::string::npos) break;
if (pos == std::string::npos) break; token = line[pos];
token = line[pos]; len = pos-oldpos;
len = pos-oldpos;
if (token == '[') { if (token == '[') {
if (!m_value.empty()) { if (!m_value.empty()) {
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false)); m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
pos = m_children.back()->AddSubTree(line, pos+1); pos = m_children.back()->AddSubTree(line, pos+1);
} } else {
else { if (len > 0) {
if (len > 0) { m_value.assign(line, oldpos, len);
m_value.assign(line, oldpos, len);
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (len > 0 && m_value.empty()) {
m_value.assign(line, oldpos, len);
}
else if (len > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
}
if (token == ' ') {
pos++;
}
}
if (!m_children.empty()) {
m_isTerminal = false;
} }
pos = AddSubTree(line, pos+1);
}
} else if (token == ' ' || token == ']') {
if (len > 0 && m_value.empty()) {
m_value.assign(line, oldpos, len);
} else if (len > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
}
if (token == ' ') {
pos++;
}
} }
if (pos == std::string::npos) { if (!m_children.empty()) {
return line.size(); m_isTerminal = false;
} }
return std::min(line.size(),pos+1); }
if (pos == std::string::npos) {
return line.size();
}
return std::min(line.size(),pos+1);
} }
std::string InternalTree::GetString(bool start) const { std::string InternalTree::GetString(bool start) const
{
std::string ret = ""; std::string ret = "";
if (!start) { if (!start) {
ret += " "; ret += " ";
} }
if (!m_isTerminal) { if (!m_isTerminal) {
ret += "["; ret += "[";
} }
ret += m_value; ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
{ ret += (*it)->GetString(false);
ret += (*it)->GetString(false); }
}
if (!m_isTerminal) { if (!m_isTerminal) {
ret += "]"; ret += "]";
} }
return ret; return ret;
} }
void InternalTree::Combine(const std::vector<TreePointer> &previous) { void InternalTree::Combine(const std::vector<TreePointer> &previous)
{
std::vector<TreePointer>::iterator it; std::vector<TreePointer>::iterator it;
bool found = false; bool found = false;
leafNT next_leafNT(this); leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) { for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it); found = next_leafNT(it);
if (found) { if (found) {
*it = *it_prev; *it = *it_prev;
} } else {
else { std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
} }
}
} }
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if ((*it)->GetLabel() == label) {
} return true;
} }
return false; }
return false;
} }
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if ((*it)->GetLabel() == label) {
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
} }
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const { bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
parent = this; if ((*it)->GetLabel() == label) {
return true; parent = this;
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
} }
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetNTLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if ((*it)->GetNTLabel() == label) {
} return true;
} }
return false; }
return false;
} }
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetNTLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if ((*it)->GetNTLabel() == label) {
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
} }
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const { bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if ((*it)->GetNTLabel() == label) { for (it = m_children.begin(); it != m_children.end(); ++it) {
parent = this; if ((*it)->GetNTLabel() == label) {
return true; parent = this;
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
} }
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
} return true;
} }
return false; }
return false;
} }
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const { bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { for (it = m_children.begin(); it != m_children.end(); ++it) {
return true; if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
} }
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const { bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
for (it = m_children.begin(); it != m_children.end(); ++it) { {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { for (it = m_children.begin(); it != m_children.end(); ++it) {
parent = this; if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true; parent = this;
} return true;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
} }
return false; std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
} }
} }

View File

@ -19,79 +19,79 @@ typedef int NTLabel;
class InternalTree class InternalTree
{ {
std::string m_value; std::string m_value;
NTLabel m_value_nt; NTLabel m_value_nt;
std::vector<TreePointer> m_children; std::vector<TreePointer> m_children;
bool m_isTerminal; bool m_isTerminal;
public: public:
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal); InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
InternalTree(const std::string & line, const bool terminal = false); InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree): InternalTree(const InternalTree & tree):
m_value(tree.m_value), m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) { m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children; const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) { for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it)); m_children.push_back(boost::make_shared<InternalTree>(**it));
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
} }
}
size_t AddSubTree(const std::string & line, size_t start);
// optionally identify label by int instead of string; std::string GetString(bool start = true) const;
// allows abstraction if multiple nonterminal strings should map to same label. void Combine(const std::vector<TreePointer> &previous);
const NTLabel & GetNTLabel() const { const std::string & GetLabel() const {
return m_value_nt; return m_value;
} }
void SetNTLabel(NTLabel value) { // optionally identify label by int instead of string;
m_value_nt = value; // allows abstraction if multiple nonterminal strings should map to same label.
} const NTLabel & GetNTLabel() const {
return m_value_nt;
}
size_t GetLength() const { void SetNTLabel(NTLabel value) {
return m_children.size(); m_value_nt = value;
} }
std::vector<TreePointer> & GetChildren() {
return m_children;
}
bool IsTerminal() const { size_t GetLength() const {
return m_isTerminal; return m_children.size();
} }
std::vector<TreePointer> & GetChildren() {
return m_children;
}
bool IsLeafNT() const { bool IsTerminal() const {
return (!m_isTerminal && m_children.size() == 0); return m_isTerminal;
} }
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents. bool IsLeafNT() const {
// can be used for formulating syntax constraints. return (!m_isTerminal && m_children.size() == 0);
}
// if found, 'it' is iterator to first tree node that matches search string // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const; // can be used for formulating syntax constraints.
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node // if found, 'it' is iterator to first tree node that matches search string
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const; bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
// if found, 'it' is iterator to first tree node that matches search string bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const; // if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// pass vector of possible labels to search // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
// if found, 'it' is iterator to first tree node that matches search string bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node // pass vector of possible labels to search
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const; // if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
}; };
@ -101,77 +101,79 @@ class TreeState : public FFState
TreePointer m_tree; TreePointer m_tree;
public: public:
TreeState(TreePointer tree) TreeState(TreePointer tree)
:m_tree(tree) :m_tree(tree) {
{}
TreePointer GetTree() const {
return m_tree;
} }
int Compare(const FFState& other) const {return 0;}; TreePointer GetTree() const {
return m_tree;
}
int Compare(const FFState& other) const {
return 0;
};
}; };
// Python-like generator that yields next nonterminal leaf on every call // Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) { $generator(leafNT)
std::vector<TreePointer>::iterator it; {
InternalTree* tree; std::vector<TreePointer>::iterator it;
leafNT(InternalTree* root = 0): tree(root) {} InternalTree* tree;
$emit(std::vector<TreePointer>::iterator) leafNT(InternalTree* root = 0): tree(root) {}
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { $emit(std::vector<TreePointer>::iterator)
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
$yield(it); if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
} $yield(it);
else if ((*it)->GetLength() > 0) { } else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get()); $restart(tree = (*it).get());
} }
}
} }
$stop; }
$stop;
}; };
// Python-like generator that yields the parent of the next nonterminal leaf on every call // Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) { $generator(leafNTParent)
std::vector<TreePointer>::iterator it; {
InternalTree* tree; std::vector<TreePointer>::iterator it;
leafNTParent(InternalTree* root = 0): tree(root) {} InternalTree* tree;
$emit(InternalTree*) leafNTParent(InternalTree* root = 0): tree(root) {}
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { $emit(InternalTree*)
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
$yield(tree); if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
} $yield(tree);
else if ((*it)->GetLength() > 0) { } else if ((*it)->GetLength() > 0) {
if ((*it).get()) { if ((*it).get()) {
$restart(tree = (*it).get()); $restart(tree = (*it).get());
} }
}
} }
$stop; }
$stop;
}; };
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal // Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath) { $generator(leafNTPath)
std::vector<TreePointer>::iterator it; {
InternalTree* tree; std::vector<TreePointer>::iterator it;
std::vector<InternalTree*> * path; InternalTree* tree;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {} std::vector<InternalTree*> * path;
$emit(std::vector<TreePointer>::iterator) leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
path->push_back(tree); $emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { path->push_back(tree);
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
path->push_back((*it).get()); if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it); path->push_back((*it).get());
path->pop_back(); $yield(it);
} path->pop_back();
else if ((*it)->GetLength() > 0) { } else if ((*it)->GetLength() > 0) {
if ((*it).get()) { if ((*it).get()) {
$restart(tree = (*it).get()); $restart(tree = (*it).get());
} }
}
} }
path->pop_back(); }
$stop; path->pop_back();
$stop;
}; };

View File

@ -15,7 +15,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
std::cerr << "Initializing LexicalReordering.." << std::endl; std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs; map<string,string> sparseArgs;
m_haveDefaultScores = false; m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) { for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i]; const vector<string> &args = m_args[i];
@ -36,7 +36,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
for(size_t i=0; i<tokens.size(); i++) { for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) ); m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
} }
m_haveDefaultScores = true; m_haveDefaultScores = true;
} else { } else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]); UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
} }
@ -84,8 +84,8 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
} }
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo, FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* out) const ScoreComponentCollection* out) const
{ {
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl); VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0); Scores score(GetNumScoreComponents(), 0);

View File

@ -46,33 +46,37 @@ public:
Scores GetProb(const Phrase& f, const Phrase& e) const; Scores GetProb(const Phrase& f, const Phrase& e) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis&, virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
int /* featureID */, int /* featureID */,
ScoreComponentCollection*) const { ScoreComponentCollection*) const {
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder"); UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
} }
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const , ScoreComponentCollection *estimatedFutureScore = NULL) const {
{} }
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const , const TranslationOptionList &translationOptionList) const {
{} }
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const {
{} }
bool GetHaveDefaultScores() { return m_haveDefaultScores; } bool GetHaveDefaultScores() {
float GetDefaultScore( size_t i ) { return m_defaultScores[i]; } return m_haveDefaultScores;
}
float GetDefaultScore( size_t i ) {
return m_defaultScores[i];
}
private: private:
bool DecodeCondition(std::string s); bool DecodeCondition(std::string s);

View File

@ -39,7 +39,7 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
} }
void LexicalReorderingConfiguration::ConfigureSparse void LexicalReorderingConfiguration::ConfigureSparse
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer) (const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{ {
if (sparseArgs.size()) { if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs, producer)); m_sparse.reset(new SparseReordering(sparseArgs, producer));
@ -95,7 +95,7 @@ LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string
} }
if (m_modelType == None) { if (m_modelType == None) {
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl; std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
exit(1); exit(1);
} }
} }
@ -134,7 +134,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
{ {
// don't call this on a bidirectional object // don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
"Unknown direction: " << m_direction); "Unknown direction: " << m_direction);
const TranslationOption* relevantOpt = &topt; const TranslationOption* relevantOpt = &topt;
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption; if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer()); const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
@ -146,8 +146,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
const Scores &scoreSet = *cachedScores; const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores()) { if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType]; scores[m_offset] = scoreSet[m_offset + reoType];
} } else {
else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType]; scores[m_offset + reoType] = scoreSet[m_offset + reoType];
} }
@ -158,8 +157,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0); Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) { if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType); scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
} } else {
else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType); scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
} }
accum->PlusEquals(m_configuration.GetScoreProducer(), scores); accum->PlusEquals(m_configuration.GetScoreProducer(), scores);

View File

@ -124,7 +124,7 @@ protected:
int ComparePrevScores(const TranslationOption *other) const; int ComparePrevScores(const TranslationOption *other) const;
//constants for the different type of reorderings (corresponding to indexes in the table file) //constants for the different type of reorderings (corresponding to indexes in the table file)
public: public:
static const ReorderingType M = 0; // monotonic static const ReorderingType M = 0; // monotonic
static const ReorderingType NM = 1; // non-monotonic static const ReorderingType NM = 1; // non-monotonic
static const ReorderingType S = 1; // swap static const ReorderingType S = 1; // swap

View File

@ -16,10 +16,11 @@
using namespace std; using namespace std;
namespace Moses namespace Moses
{ {
const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) { const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
{
static string kSep = "-"; static string kSep = "-";
static string name; static string name;
ostringstream buf; ostringstream buf;
@ -55,7 +56,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
} }
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer) SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
: m_producer(producer) : m_producer(producer)
{ {
static const string kSource= "source"; static const string kSource= "source";
static const string kTarget = "target"; static const string kTarget = "target";
@ -93,22 +94,24 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
} }
void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) { void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
{
for (size_t type = SparseReorderingFeatureKey::Stack; for (size_t type = SparseReorderingFeatureKey::Stack;
type <= SparseReorderingFeatureKey::Between; ++type) { type <= SparseReorderingFeatureKey::Between; ++type) {
for (size_t position = SparseReorderingFeatureKey::First; for (size_t position = SparseReorderingFeatureKey::First;
position <= SparseReorderingFeatureKey::Last; ++position) { position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) { for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
SparseReorderingFeatureKey key( SparseReorderingFeatureKey key(
index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster, index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType); static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id)))); m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
} }
} }
} }
} }
void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) { void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
{
ifstream fh(filename.c_str()); ifstream fh(filename.c_str());
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename); UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
string line; string line;
@ -118,12 +121,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
//TODO: StringPiece //TODO: StringPiece
const Factor* factor = FactorCollection::Instance().AddFactor(line); const Factor* factor = FactorCollection::Instance().AddFactor(line);
pWordLists->back().second.insert(factor); pWordLists->back().second.insert(factor);
PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false); PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
} }
} }
void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) { void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
{
pClusterMaps->push_back(ClusterMap()); pClusterMaps->push_back(ClusterMap());
pClusterMaps->back().first = id; pClusterMaps->back().first = id;
util::FilePiece file(filename.c_str()); util::FilePiece file(filename.c_str());
@ -141,15 +145,16 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'"); if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter); const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
pClusterMaps->back().second[wordFactor] = idFactor; pClusterMaps->back().second[wordFactor] = idFactor;
PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true); PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
} }
} }
void SparseReordering::AddFeatures( void SparseReordering::AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position, const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType, LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const { ScoreComponentCollection* scores) const
{
const Factor* wordFactor = word.GetFactor(0); const Factor* wordFactor = word.GetFactor(0);
@ -186,18 +191,18 @@ void SparseReordering::AddFeatures(
} }
void SparseReordering::CopyScores( void SparseReordering::CopyScores(
const TranslationOption& currentOpt, const TranslationOption& currentOpt,
const TranslationOption* previousOpt, const TranslationOption* previousOpt,
const InputType& input, const InputType& input,
LexicalReorderingState::ReorderingType reoType, LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction, LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const ScoreComponentCollection* scores) const
{ {
if (m_useBetween && direction == LexicalReorderingConfiguration::Backward && if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
(reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL || (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
reoType == LexicalReorderingState::DR)) { reoType == LexicalReorderingState::DR)) {
size_t gapStart, gapEnd; size_t gapStart, gapEnd;
//NB: Using a static cast for speed, but could be nasty if //NB: Using a static cast for speed, but could be nasty if
//using non-sentence input //using non-sentence input
const Sentence& sentence = static_cast<const Sentence&>(input); const Sentence& sentence = static_cast<const Sentence&>(input);
const WordsRange& currentRange = currentOpt.GetSourceWordsRange(); const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
@ -217,9 +222,9 @@ void SparseReordering::CopyScores(
} }
assert(gapStart < gapEnd); assert(gapStart < gapEnd);
for (size_t i = gapStart; i < gapEnd; ++i) { for (size_t i = gapStart; i < gapEnd; ++i) {
AddFeatures(SparseReorderingFeatureKey::Between, AddFeatures(SparseReorderingFeatureKey::Between,
SparseReorderingFeatureKey::Source, sentence.GetWord(i), SparseReorderingFeatureKey::Source, sentence.GetWord(i),
SparseReorderingFeatureKey::First, reoType, scores); SparseReorderingFeatureKey::First, reoType, scores);
} }
} }
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
@ -240,11 +245,11 @@ void SparseReordering::CopyScores(
} }
const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
SparseReorderingFeatureKey::First, reoType, scores); SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
SparseReorderingFeatureKey::First, reoType, scores); SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);

View File

@ -23,7 +23,7 @@
/** /**
Configuration of sparse reordering: Configuration of sparse reordering:
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line. The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
@ -38,7 +38,7 @@
namespace Moses namespace Moses
{ {
/** /**
* Used to store pre-calculated feature names. * Used to store pre-calculated feature names.
**/ **/
struct SparseReorderingFeatureKey { struct SparseReorderingFeatureKey {
@ -51,17 +51,17 @@ struct SparseReorderingFeatureKey {
LexicalReorderingState::ReorderingType reoType; LexicalReorderingState::ReorderingType reoType;
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_, SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_) Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
: id(id_), type(type_), word(word_), isCluster(isCluster_), : id(id_), type(type_), word(word_), isCluster(isCluster_),
position(position_), side(side_), reoType(reoType_) position(position_), side(side_), reoType(reoType_) {
{} }
const std::string& Name(const std::string& wordListId) ; const std::string& Name(const std::string& wordListId) ;
}; };
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> { struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
std::size_t operator()(const SparseReorderingFeatureKey& key) const { std::size_t operator()(const SparseReorderingFeatureKey& key) const {
//TODO: can we just hash the memory? //TODO: can we just hash the memory?
//not sure, there could be random padding //not sure, there could be random padding
std::size_t seed = 0; std::size_t seed = 0;
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed); seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
@ -76,7 +76,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
}; };
struct EqualsSparseReorderingFeatureKey : struct EqualsSparseReorderingFeatureKey :
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> { public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const { bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
//TODO: Can we just compare the memory? //TODO: Can we just compare the memory?
return left.id == right.id && left.type == right.type && left.word == right.word && return left.id == right.id && left.type == right.type && left.word == right.word &&
@ -89,14 +89,14 @@ class SparseReordering
{ {
public: public:
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer); SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
//If direction is backward the options will be different, for forward they will be the same //If direction is backward the options will be different, for forward they will be the same
void CopyScores(const TranslationOption& currentOpt, void CopyScores(const TranslationOption& currentOpt,
const TranslationOption* previousOpt, const TranslationOption* previousOpt,
const InputType& input, const InputType& input,
LexicalReorderingState::ReorderingType reoType, LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction, LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const ; ScoreComponentCollection* scores) const ;
private: private:
const LexicalReordering* m_producer; const LexicalReordering* m_producer;
@ -113,14 +113,14 @@ private:
FeatureMap m_featureMap; FeatureMap m_featureMap;
void ReadWordList(const std::string& filename, const std::string& id, void ReadWordList(const std::string& filename, const std::string& id,
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists); SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps); void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster); void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void AddFeatures( void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position, const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType, LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const; ScoreComponentCollection* scores) const;
}; };

View File

@ -14,10 +14,10 @@ using namespace std;
namespace Moses namespace Moses
{ {
MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line) MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
:StatelessFeatureFunction(1, line) :StatelessFeatureFunction(1, line)
,m_maxSpan(2) ,m_maxSpan(2)
,m_glueTargetLHSStr("S") ,m_glueTargetLHSStr("S")
,m_glueTargetLHS(true) ,m_glueTargetLHS(true)
{ {
m_tuneable = false; m_tuneable = false;
ReadParameters(); ReadParameters();
@ -28,25 +28,25 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
} }
void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
targetPhrase.SetRuleSource(source); targetPhrase.SetRuleSource(source);
} }
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const , ScoreComponentCollection *estimatedFutureScore) const
{ {
const Word &targetLHS = targetPhrase.GetTargetLHS(); const Word &targetLHS = targetPhrase.GetTargetLHS();
if (targetLHS == m_glueTargetLHS) { if (targetLHS == m_glueTargetLHS) {
// don't delete glue rules // don't delete glue rules
return; return;
} }
const Phrase *source = targetPhrase.GetRuleSource(); const Phrase *source = targetPhrase.GetRuleSource();
@ -54,17 +54,17 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
float score = 0; float score = 0;
if (source->Front().IsNonTerminal()) { if (source->Front().IsNonTerminal()) {
const ChartCellLabel &cell = *stackVec->front(); const ChartCellLabel &cell = *stackVec->front();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) { if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity(); score = - std::numeric_limits<float>::infinity();
} }
} }
if (source->Back().IsNonTerminal()) { if (source->Back().IsNonTerminal()) {
const ChartCellLabel &cell = *stackVec->back(); const ChartCellLabel &cell = *stackVec->back();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) { if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity(); score = - std::numeric_limits<float>::infinity();
} }
} }
@ -76,7 +76,7 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value) void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value)
{ {
if (key == "max-span") { if (key == "max-span") {
m_maxSpan = Scan<int>(value); m_maxSpan = Scan<int>(value);
} else { } else {
StatelessFeatureFunction::SetParameter(key, value); StatelessFeatureFunction::SetParameter(key, value);
} }
@ -84,8 +84,8 @@ void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::s
std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const
{ {
std::vector<float> ret(1, 1); std::vector<float> ret(1, 1);
return ret; return ret;
} }
} }

Some files were not shown because too many files have changed in this diff Show More