This commit is contained in:
Hieu Hoang 2015-01-14 11:07:42 +00:00
parent 91cb549ccf
commit 05ead45e71
406 changed files with 19495 additions and 20485 deletions

View File

@ -153,19 +153,19 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
break;
}
case 4: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
break;
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
break;
}
case 5: {
// sparse features
sparseFeatures << tok << " ";
// sparse features
sparseFeatures << tok << " ";
break;
}
case 6: {
property << tok << " ";
break;
property << tok << " ";
break;
}
default:
cerr << "ERROR in line " << line << endl;

View File

@ -166,10 +166,10 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
@ -350,13 +350,13 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
bytesRead += sizeof(UINT64);
if (strSize) {
char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0';
fileTPColl.read(mem, strSize);
outStr = string(mem);
free(mem);
char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0';
fileTPColl.read(mem, strSize);
outStr = string(mem);
free(mem);
bytesRead += strSize;
bytesRead += strSize;
}
return bytesRead;

View File

@ -113,14 +113,12 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
void SetProperty(const std::string &value)
{
m_property = value;
void SetProperty(const std::string &value) {
m_property = value;
}
void SetSparseFeatures(const std::string &value)
{
m_sparseFeatures = value;
void SetSparseFeatures(const std::string &value) {
m_sparseFeatures = value;
}
};

View File

@ -105,18 +105,17 @@ void Word::ConvertToMoses(
overwrite = Moses::Word(m_isNonTerminal);
if (m_isNonTerminal) {
const std::string &tok = vocab.GetString(m_vocabId);
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
}
else {
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
const std::string &tok = vocab.GetString(m_vocabId);
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
} else {
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
}
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
}
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
}

View File

@ -7,7 +7,8 @@ size_t lookup( string );
vector<string> tokenize( const char input[] );
SuffixArray suffixArray;
int main(int argc, char* argv[]) {
int main(int argc, char* argv[])
{
// handle parameters
string query;
string fileNameSuffix;
@ -95,14 +96,14 @@ int main(int argc, char* argv[]) {
}
cout << lookup( query ) << endl;
}
}
else if (queryFlag) {
} else if (queryFlag) {
cout << lookup( query ) << endl;
}
return 0;
}
size_t lookup( string query ) {
size_t lookup( string query )
{
cerr << "query is " << query << endl;
vector< string > queryString = tokenize( query.c_str() );
return suffixArray.Count( queryString );

View File

@ -61,7 +61,8 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value;
}
void SparseVector::set(size_t id, FeatureStatsType value) {
void SparseVector::set(size_t id, FeatureStatsType value)
{
assert(m_id_to_name.size() > id);
m_fvector[id] = value;
}
@ -204,7 +205,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats()
{
delete [] m_array;
delete [] m_array;
}
void FeatureStats::Copy(const FeatureStats &stats)

View File

@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
namespace MosesTuning {
namespace MosesTuning
{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec)
{
out << "[";
for (size_t i = 0; i < wordVec.size(); ++i) {
out << wordVec[i]->first;
@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
}
void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
void ReferenceSet::Load(const vector<string>& files, Vocab& vocab)
{
for (size_t i = 0; i < files.size(); ++i) {
util::FilePiece fh(files[i].c_str());
size_t sentenceId = 0;
@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
} catch (util::EndOfFileException &e) {
break;
}
AddLine(sentenceId, line, vocab);
++sentenceId;
AddLine(sentenceId, line, vocab);
++sentenceId;
}
}
}
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
{
//cerr << line << endl;
NgramCounter ngramCounts;
list<WordVec> openNgrams;
@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
openNgrams.push_front(WordVec());
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
k->push_back(nextTok);
++ngramCounts[*k];
++ngramCounts[*k];
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
//merge into overall ngram map
for (NgramCounter::const_iterator ni = ngramCounts.begin();
ni != ngramCounts.end(); ++ni) {
ni != ngramCounts.end(); ++ni) {
size_t count = ni->second;
//cerr << *ni << " " << count << endl;
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
//cerr << endl;
}
size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const
{
const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
NgramMap::const_iterator ngi = ngramCounts.find(ngram);
if (ngi == ngramCounts.end()) return 0;
@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
{
for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
//cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
size_t order = ngi->first.size();
@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
}
}
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
{
size_t targetLength = 0;
for (size_t i = 0; i < edge.Words().size(); ++i) {
const Vocab::Entry* word = edge.Words()[i];
@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
return targetLength;
}
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
{
NgramCounter ngramCounts;
size_t childId = 0;
size_t wordId = 0;
@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bool inRightContext = false;
list<WordVec> openNgrams;
const Vocab::Entry* currentWord = NULL;
while (wordId < edge.Words().size()) {
while (wordId < edge.Words().size()) {
currentWord = edge.Words()[wordId];
if (currentWord != NULL) {
++wordId;
@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
//Collect matches
//This edge
//cerr << "edge ngrams" << endl;
@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
}
}
FeatureStatsType sourceLength = head.SourceCovered();
size_t referenceLength = references_.Length(sentenceId_);
FeatureStatsType effectiveReferenceLength =
FeatureStatsType effectiveReferenceLength =
sourceLength / totalSourceLength_ * referenceLength;
bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
//backgroundBleu_[backgroundBleu_.size()-1] =
//backgroundBleu_[backgroundBleu_.size()-1] =
// backgroundRefLength_ * sourceLength / totalSourceLength_;
FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
return bleu;
}
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
{
//TODO: Maybe more efficient to absorb into the Score() method
VertexState& vertexState = vertexStates_[vertexId];
//cerr << "Updating state for " << vertexId << endl;
//leftContext
int wi = 0;
const VertexState* childState = NULL;
@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
//start of child state
childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
contexti = 0;
}
}
if ((size_t)contexti < childState->leftContext.size()) {
vertexState.leftContext.push_back(childState->leftContext[contexti++]);
vertexState.leftContext.push_back(childState->leftContext[contexti++]);
} else {
//end of child context
childState = NULL;
@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
* Recurse through back pointers
**/
static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
HgHypothesis* bestHypo) {
HgHypothesis* bestHypo)
{
//cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
//UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
if (!bps[vertexId].first) return;
@ -334,7 +344,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
}
}
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
{
BackPointer init(NULL,kMinScore);
vector<BackPointer> backPointers(graph.VertexSize(),init);
@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
//UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
//If no incoming edges, vertex is a dead end
backPointers[vi].first = NULL;
backPointers[vi].second = kMinScore;
backPointers[vi].second = kMinScore;
} else {
//cerr << "\nVertex: " << vi << endl;
for (size_t ei = 0; ei < incoming.size(); ++ei) {
@ -362,10 +372,10 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
}
vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
// cerr << "Score: " << incomingScore << " Bleu: ";
// if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
// cerr << "Score: " << incomingScore << " Bleu: ";
// if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
FeatureStatsType totalScore = incomingScore;
if (bleuWeight) {
if (bleuWeight) {
FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
if (isnan(bleuScore)) {
cerr << "WARN: bleu score undefined" << endl;
@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
}
//UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
totalScore += bleuWeight * bleuScore;
// cerr << bleuScore << " Total: " << incomingScore << endl << endl;
// cerr << bleuScore << " Total: " << incomingScore << endl << endl;
//cerr << "is " << incomingScore << " bs " << bleuScore << endl;
}
if (totalScore >= winnerScore) {

View File

@ -27,7 +27,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BleuScorer.h"
#include "Hypergraph.h"
namespace MosesTuning {
namespace MosesTuning
{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
@ -47,18 +48,21 @@ struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&,
typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
class ReferenceSet {
class ReferenceSet
{
public:
void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
void Load(const std::vector<std::string>& files, Vocab& vocab);
size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
size_t Length(size_t sentenceId) const {
return lengths_[sentenceId];
}
private:
//ngrams to (clipped,unclipped) counts
@ -80,31 +84,32 @@ struct VertexState {
/**
* Used to score an rule (ie edge) when we are applying it.
**/
class HgBleuScorer {
public:
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
class HgBleuScorer
{
public:
HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
vertexStates_.resize(graph.VertexSize());
totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
}
backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
vertexStates_.resize(graph.VertexSize());
totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
}
FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
private:
const ReferenceSet& references_;
std::vector<VertexState> vertexStates_;
size_t sentenceId_;
size_t totalSourceLength_;
const Graph& graph_;
std::vector<FeatureStatsType> backgroundBleu_;
FeatureStatsType backgroundRefLength_;
private:
const ReferenceSet& references_;
std::vector<VertexState> vertexStates_;
size_t sentenceId_;
size_t totalSourceLength_;
const Graph& graph_;
std::vector<FeatureStatsType> backgroundBleu_;
FeatureStatsType backgroundRefLength_;
void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
size_t GetTargetLength(const Edge& edge) const;
void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
size_t GetTargetLength(const Edge& edge) const;
};
struct HgHypothesis {

View File

@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
for (size_t i = 0; i < 9; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}

View File

@ -34,11 +34,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
namespace fs = boost::filesystem;
namespace MosesTuning {
namespace MosesTuning
{
static const ValType BLEU_RATIO = 5;
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
{
vector<ValType> stats(scorer_->NumberOfScores(),0);
for(reset(); !finished(); next()) {
vector<ValType> sent;
@ -51,13 +53,14 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
}
NbestHopeFearDecoder::NbestHopeFearDecoder(
const vector<string>& featureFiles,
const vector<string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope,
Scorer* scorer
) : safe_hope_(safe_hope) {
const vector<string>& featureFiles,
const vector<string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope,
Scorer* scorer
) : safe_hope_(safe_hope)
{
scorer_ = scorer;
if (streaming) {
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
@ -67,25 +70,29 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
}
void NbestHopeFearDecoder::next() {
void NbestHopeFearDecoder::next()
{
train_->next();
}
bool NbestHopeFearDecoder::finished() {
bool NbestHopeFearDecoder::finished()
{
return train_->finished();
}
void NbestHopeFearDecoder::reset() {
void NbestHopeFearDecoder::reset()
{
train_->reset();
}
void NbestHopeFearDecoder::HopeFear(
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
) {
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
)
{
// Hope / fear decode
ValType hope_scale = 1.0;
size_t hope_index=0, fear_index=0, model_index=0;
@ -134,7 +141,8 @@ void NbestHopeFearDecoder::HopeFear(
hopeFear->hopeFearEqual = (hope_index == fear_index);
}
void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
{
// Find max model
size_t max_index=0;
ValType max_score=0;
@ -152,18 +160,19 @@ void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValTy
HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
(
const string& hypergraphDir,
const vector<string>& referenceFiles,
size_t num_dense,
bool streaming,
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv,
Scorer* scorer
) :
num_dense_(num_dense) {
(
const string& hypergraphDir,
const vector<string>& referenceFiles,
size_t num_dense,
bool streaming,
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv,
Scorer* scorer
) :
num_dense_(num_dense)
{
UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
@ -177,17 +186,17 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
static const string kWeights = "weights";
fs::directory_iterator dend;
size_t fileCount = 0;
cerr << "Reading hypergraphs" << endl;
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
const fs::path& hgpath = di->path();
if (hgpath.filename() == kWeights) continue;
// cerr << "Reading " << hgpath.filename() << endl;
// cerr << "Reading " << hgpath.filename() << endl;
Graph graph(vocab_);
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
//util::FilePiece file(di->path().string().c_str());
util::FilePiece file(fd.release());
util::FilePiece file(fd.release());
ReadGraph(file,graph);
//cerr << "ref length " << references_.Length(id) << endl;
@ -196,7 +205,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
prunedGraph.reset(new Graph(vocab_));
graph.Prune(prunedGraph.get(), weights, edgeCount);
graphs_[id] = prunedGraph;
// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
++fileCount;
if (fileCount % 10 == 0) cerr << ".";
if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
@ -211,23 +220,27 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
}
void HypergraphHopeFearDecoder::reset() {
void HypergraphHopeFearDecoder::reset()
{
sentenceIdIter_ = sentenceIds_.begin();
}
void HypergraphHopeFearDecoder::next() {
void HypergraphHopeFearDecoder::next()
{
sentenceIdIter_++;
}
bool HypergraphHopeFearDecoder::finished() {
bool HypergraphHopeFearDecoder::finished()
{
return sentenceIdIter_ == sentenceIds_.end();
}
void HypergraphHopeFearDecoder::HopeFear(
const vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
) {
const vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
)
{
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
@ -247,12 +260,12 @@ void HypergraphHopeFearDecoder::HopeFear(
Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
// where model score is having far more influence than BLEU
// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
// hope_scale = abs(hope_bleu) / abs(hope_model);
// else break;
// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
// hope_scale = abs(hope_bleu) / abs(hope_model);
// else break;
//TODO: Don't currently get model and bleu so commented this out for now.
break;
}
@ -311,15 +324,16 @@ void HypergraphHopeFearDecoder::HopeFear(
if (hopeFear->hopeFearEqual) {
for (size_t i = 0; i < fearStats.size(); ++i) {
if (fearStats[i] != hopeFear->hopeStats[i]) {
hopeFear->hopeFearEqual = false;
break;
hopeFear->hopeFearEqual = false;
break;
}
}
}
hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
}
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
{
assert(!finished());
HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_;

View File

@ -35,7 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// the n-best list and lattice/hypergraph implementations
//
namespace MosesTuning {
namespace MosesTuning
{
class Scorer;
@ -44,7 +45,7 @@ struct HopeFearData {
MiraFeatureVector modelFeatures;
MiraFeatureVector hopeFeatures;
MiraFeatureVector fearFeatures;
std::vector<float> modelStats;
std::vector<float> hopeStats;
@ -55,7 +56,8 @@ struct HopeFearData {
};
//Abstract base class
class HopeFearDecoder {
class HopeFearDecoder
{
public:
//iterator methods
virtual void reset() = 0;
@ -68,10 +70,10 @@ public:
* Calculate hope, fear and model hypotheses
**/
virtual void HopeFear(
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
) = 0;
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
) = 0;
/** Max score decoding */
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
@ -86,25 +88,26 @@ protected:
/** Gets hope-fear from nbest lists */
class NbestHopeFearDecoder : public virtual HopeFearDecoder {
class NbestHopeFearDecoder : public virtual HopeFearDecoder
{
public:
NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
const std::vector<std::string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope,
Scorer* scorer
);
const std::vector<std::string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope,
Scorer* scorer
);
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
);
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
);
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
@ -117,29 +120,30 @@ private:
/** Gets hope-fear from hypergraphs */
class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
class HypergraphHopeFearDecoder : public virtual HopeFearDecoder
{
public:
HypergraphHopeFearDecoder(
const std::string& hypergraphDir,
const std::vector<std::string>& referenceFiles,
size_t num_dense,
bool streaming,
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv,
Scorer* scorer_
);
const std::string& hypergraphDir,
const std::vector<std::string>& referenceFiles,
size_t num_dense,
bool streaming,
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv,
Scorer* scorer_
);
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
);
const std::vector<ValType>& backgroundBleu,
const MiraWeightVector& wv,
HopeFearData* hopeFear
);
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);

View File

@ -55,7 +55,8 @@ void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
}
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc)
{
if (tree->GetLength() > 0) {
string head = getHead(tree);
@ -64,8 +65,7 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, history, hwc);
}
}
else {
} else {
vector<string> new_history(kHwcmOrder);
new_history[0] = head;
hwc[0][head]++;
@ -85,11 +85,11 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
}
}
string HwcmScorer::getHead(TreePointer tree) {
string HwcmScorer::getHead(TreePointer tree)
{
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
{
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
TreePointer child = *it;
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {

View File

@ -31,18 +31,22 @@ using namespace std;
static const string kBOS = "<s>";
static const string kEOS = "</s>";
namespace MosesTuning {
namespace MosesTuning
{
StringPiece NextLine(util::FilePiece& from) {
StringPiece NextLine(util::FilePiece& from)
{
StringPiece line;
while ((line = from.ReadLine()).starts_with("#"));
return line;
}
Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS))
{
}
const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str)
{
#if BOOST_VERSION >= 104200
Map::const_iterator i= map_.find(str, Hash(), Equals());
#else
@ -62,7 +66,8 @@ double_conversion::StringToDoubleConverter converter(double_conversion::StringTo
/**
* Reads an incoming edge. Returns edge and source words covered.
**/
static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph)
{
Edge* edge = graph.NewEdge();
StringPiece line = from.ReadLine(); //Don't allow comments within edge lists
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
@ -82,7 +87,7 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
edge->AddWord(&found);
}
}
//Features
++pipes;
for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
@ -100,17 +105,18 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
//Covered words
++pipes;
size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
return pair<Edge*,size_t>(edge,sourceCovered);
return pair<Edge*,size_t>(edge,sourceCovered);
}
void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const
{
Graph& newGraph = *pNewGraph;
//TODO: Optimise case where no pruning required
//For debug
/*
map<const Edge*, string> edgeIds;
for (size_t i = 0; i < edges_.Size(); ++i) {
@ -136,7 +142,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//Compute backward scores
for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
// cerr << "Vertex " << vi << endl;
// cerr << "Vertex " << vi << endl;
const Vertex& vertex = vertices_[vi];
const vector<const Edge*>& incoming = vertex.GetIncoming();
if (!incoming.size()) {
@ -150,7 +156,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
size_t childId = incoming[ei]->Children()[i];
UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
outgoing[childId].push_back(incoming[ei]);
incomingScore += vertexBackwardScores[childId];
}
@ -172,7 +178,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} else {
for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
//cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
FeatureStatsType outgoingScore = 0;
FeatureStatsType outgoingScore = 0;
//add score of head
outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
//cerr << "Forward score " << outgoingScore << endl;
@ -204,11 +210,11 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
// cerr << edgeIds[edge] << " " << score << endl;
// cerr << edgeIds[edge] << " " << score << endl;
}
multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
size_t edgeCount = 1;
while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
@ -235,10 +241,10 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
map<size_t,size_t> oldIdToNew;
size_t vi = 0;
for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
// cerr << *i << " New: " << vi << endl;
// cerr << *i << " New: " << vi << endl;
oldIdToNew[*i] = vi;
Vertex* vertex = newGraph.NewVertex();
vertex->SetSourceCovered(vertices_[*i].SourceCovered());
vertex->SetSourceCovered(vertices_[*i].SourceCovered());
}
for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
@ -255,7 +261,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
newHead.AddEdge(newEdge);
}
/*
cerr << "New graph" << endl;
for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
@ -275,21 +281,22 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
cerr << endl;
}
*/
*/
}
/**
* Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
**/
void ReadGraph(util::FilePiece &from, Graph &graph) {
void ReadGraph(util::FilePiece &from, Graph &graph)
{
//First line should contain field names
StringPiece line = from.ReadLine();
UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
line = NextLine(from);
//Then expect numbers of vertices
util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
@ -304,9 +311,11 @@ void ReadGraph(util::FilePiece &from, Graph &graph) {
for (unsigned long int e = 0; e < edge_count; ++e) {
pair<Edge*,size_t> edge = ReadEdge(from, graph);
vertex->AddEdge(edge.first);
//Note: the file format attaches this to the edge, but it's really a property
//Note: the file format attaches this to the edge, but it's really a property
//of the vertex.
if (!e) {vertex->SetSourceCovered(edge.second);}
if (!e) {
vertex->SetSourceCovered(edge.second);
}
}
}
}

View File

@ -37,81 +37,88 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h"
namespace MosesTuning {
namespace MosesTuning
{
typedef unsigned int WordIndex;
const WordIndex kMaxWordIndex = UINT_MAX;
const FeatureStatsType kMinScore = -1e10;
template <class T> class FixedAllocator : boost::noncopyable {
public:
FixedAllocator() : current_(NULL), end_(NULL) {}
template <class T> class FixedAllocator : boost::noncopyable
{
public:
FixedAllocator() : current_(NULL), end_(NULL) {}
void Init(std::size_t count) {
assert(!current_);
array_.reset(new T[count]);
current_ = array_.get();
end_ = current_ + count;
}
void Init(std::size_t count) {
assert(!current_);
array_.reset(new T[count]);
current_ = array_.get();
end_ = current_ + count;
}
T &operator[](std::size_t idx) {
return array_.get()[idx];
}
const T &operator[](std::size_t idx) const {
return array_.get()[idx];
}
T &operator[](std::size_t idx) {
return array_.get()[idx];
}
const T &operator[](std::size_t idx) const {
return array_.get()[idx];
}
T *New() {
T *ret = current_++;
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
return ret;
}
T *New() {
T *ret = current_++;
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
return ret;
}
std::size_t Capacity() const {
return end_ - array_.get();
}
std::size_t Capacity() const {
return end_ - array_.get();
}
std::size_t Size() const {
return current_ - array_.get();
}
std::size_t Size() const {
return current_ - array_.get();
}
private:
boost::scoped_array<T> array_;
T *current_, *end_;
private:
boost::scoped_array<T> array_;
T *current_, *end_;
};
class Vocab {
public:
Vocab();
class Vocab
{
public:
Vocab();
typedef std::pair<const char *const, WordIndex> Entry;
typedef std::pair<const char *const, WordIndex> Entry;
const Entry &FindOrAdd(const StringPiece &str);
const Entry &FindOrAdd(const StringPiece &str);
const Entry& Bos() const {return bos_;}
const Entry& Bos() const {
return bos_;
}
const Entry& Eos() const {return eos_;}
const Entry& Eos() const {
return eos_;
}
private:
util::Pool piece_backing_;
private:
util::Pool piece_backing_;
struct Hash : public std::unary_function<const char *, std::size_t> {
std::size_t operator()(StringPiece str) const {
return util::MurmurHashNative(str.data(), str.size());
}
};
struct Hash : public std::unary_function<const char *, std::size_t> {
std::size_t operator()(StringPiece str) const {
return util::MurmurHashNative(str.data(), str.size());
}
};
struct Equals : public std::binary_function<const char *, const char *, bool> {
bool operator()(StringPiece first, StringPiece second) const {
return first == second;
}
};
struct Equals : public std::binary_function<const char *, const char *, bool> {
bool operator()(StringPiece first, StringPiece second) const {
return first == second;
}
};
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
Map map_;
Entry eos_;
Entry bos_;
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
Map map_;
Entry eos_;
Entry bos_;
};
@ -125,121 +132,141 @@ typedef boost::shared_ptr<SparseVector> FeaturePtr;
/**
* An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
**/
class Edge {
public:
Edge() {features_.reset(new SparseVector());}
class Edge
{
public:
Edge() {
features_.reset(new SparseVector());
}
void AddWord(const Vocab::Entry *word) {
words_.push_back(word);
}
void AddWord(const Vocab::Entry *word) {
words_.push_back(word);
}
void AddChild(size_t child) {
children_.push_back(child);
}
void AddChild(size_t child) {
children_.push_back(child);
}
void AddFeature(const StringPiece& name, FeatureStatsType value) {
//TODO StringPiece interface
features_->set(name.as_string(),value);
}
void AddFeature(const StringPiece& name, FeatureStatsType value) {
//TODO StringPiece interface
features_->set(name.as_string(),value);
}
const WordVec &Words() const {
return words_;
}
const FeaturePtr& Features() const {
return features_;
}
const WordVec &Words() const {
return words_;
}
void SetFeatures(const FeaturePtr& features) {
features_ = features;
}
const FeaturePtr& Features() const {
return features_;
}
const std::vector<size_t>& Children() const {
return children_;
}
void SetFeatures(const FeaturePtr& features) {
features_ = features;
}
FeatureStatsType GetScore(const SparseVector& weights) const {
return inner_product(*(features_.get()), weights);
}
const std::vector<size_t>& Children() const {
return children_;
}
private:
// NULL for non-terminals.
std::vector<const Vocab::Entry*> words_;
std::vector<size_t> children_;
boost::shared_ptr<SparseVector> features_;
FeatureStatsType GetScore(const SparseVector& weights) const {
return inner_product(*(features_.get()), weights);
}
private:
// NULL for non-terminals.
std::vector<const Vocab::Entry*> words_;
std::vector<size_t> children_;
boost::shared_ptr<SparseVector> features_;
};
/*
* A vertex has 0..n incoming edges
**/
class Vertex {
public:
Vertex() : sourceCovered_(0) {}
class Vertex
{
public:
Vertex() : sourceCovered_(0) {}
void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
void AddEdge(const Edge* edge) {
incoming_.push_back(edge);
}
void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
void SetSourceCovered(size_t sourceCovered) {
sourceCovered_ = sourceCovered;
}
const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
const std::vector<const Edge*>& GetIncoming() const {
return incoming_;
}
size_t SourceCovered() const {return sourceCovered_;}
size_t SourceCovered() const {
return sourceCovered_;
}
private:
std::vector<const Edge*> incoming_;
size_t sourceCovered_;
private:
std::vector<const Edge*> incoming_;
size_t sourceCovered_;
};
class Graph : boost::noncopyable {
public:
Graph(Vocab& vocab) : vocab_(vocab) {}
class Graph : boost::noncopyable
{
public:
Graph(Vocab& vocab) : vocab_(vocab) {}
void SetCounts(std::size_t vertices, std::size_t edges) {
vertices_.Init(vertices);
edges_.Init(edges);
}
void SetCounts(std::size_t vertices, std::size_t edges) {
vertices_.Init(vertices);
edges_.Init(edges);
}
Vocab &MutableVocab() { return vocab_; }
Vocab &MutableVocab() {
return vocab_;
}
Edge *NewEdge() {
return edges_.New();
}
Edge *NewEdge() {
return edges_.New();
}
Vertex *NewVertex() {
return vertices_.New();
}
Vertex *NewVertex() {
return vertices_.New();
}
const Vertex &GetVertex(std::size_t index) const {
return vertices_[index];
}
const Vertex &GetVertex(std::size_t index) const {
return vertices_[index];
}
Edge &GetEdge(std::size_t index) {
return edges_[index];
}
Edge &GetEdge(std::size_t index) {
return edges_[index];
}
/* Created a pruned copy of this graph with minEdgeCount edges. Uses
the scores in the max-product semiring to rank edges, as suggested by
Colin Cherry */
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
/* Created a pruned copy of this graph with minEdgeCount edges. Uses
the scores in the max-product semiring to rank edges, as suggested by
Colin Cherry */
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
std::size_t VertexSize() const { return vertices_.Size(); }
std::size_t EdgeSize() const { return edges_.Size(); }
std::size_t VertexSize() const {
return vertices_.Size();
}
std::size_t EdgeSize() const {
return edges_.Size();
}
bool IsBoundary(const Vocab::Entry* word) const {
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
}
bool IsBoundary(const Vocab::Entry* word) const {
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
}
private:
FixedAllocator<Edge> edges_;
FixedAllocator<Vertex> vertices_;
Vocab& vocab_;
private:
FixedAllocator<Edge> edges_;
FixedAllocator<Vertex> vertices_;
Vocab& vocab_;
};
class HypergraphException : public util::Exception {
public:
HypergraphException() {}
~HypergraphException() throw() {}
class HypergraphException : public util::Exception
{
public:
HypergraphException() {}
~HypergraphException() throw() {}
};

View File

@ -8,12 +8,12 @@
using namespace std;
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(prune)
BOOST_AUTO_TEST_CASE(prune)
{
Vocab vocab;
WordVec words;
string wordStrings[] =
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
{"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(prune)
BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
BOOST_CHECK_EQUAL(4, pruned.VertexSize());
//edges retained should be best path (<s> ab jk </s>) and hi
BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
@ -115,37 +115,37 @@ BOOST_AUTO_TEST_CASE(prune)
const Edge* edge;
edge = pruned.GetVertex(0).GetIncoming()[0];
BOOST_CHECK_EQUAL(1, edge->Words().size());
BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
BOOST_CHECK_EQUAL(1, edge->Words().size());
BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
edge = pruned.GetVertex(1).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(1).GetIncoming()[1];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(2).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(3).GetIncoming()[0];
BOOST_CHECK_EQUAL(2, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
}

View File

@ -174,19 +174,19 @@ float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& tota
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
{
size_t scorerNum = 0;
size_t last = 0;
float refLen = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores();
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
last += numScoresScorer;
scorerNum++;
}
return refLen;
size_t scorerNum = 0;
size_t last = 0;
float refLen = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores();
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
last += numScoresScorer;
scorerNum++;
}
return refLen;
}
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{

View File

@ -9,7 +9,8 @@ namespace MosesTuning
{
void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit)
{
vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
@ -40,7 +41,8 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
InitSparse(vec.sparse);
}
MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense)
{
m_dense.resize(num_dense);
//Assume that features with id [0,num_dense) are the dense features
for (size_t id = 0; id < num_dense; ++id) {
@ -162,7 +164,8 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b)
{
ValType eps = 1e-8;
//dense features
if (a.m_dense.size() != b.m_dense.size()) return false;

View File

@ -93,7 +93,8 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
void MiraWeightVector::ToSparse(SparseVector* sparse) const {
void MiraWeightVector::ToSparse(SparseVector* sparse) const
{
for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) {
sparse->set(i,m_weights[i]);
@ -171,7 +172,8 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
void AvgWeightVector::ToSparse(SparseVector* sparse) const {
void AvgWeightVector::ToSparse(SparseVector* sparse) const
{
for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i);
if(abs(w)>1e-8) {

View File

@ -23,7 +23,7 @@ namespace MosesTuning
*/
class StatisticsBasedScorer : public Scorer
{
friend class HopeFearDecoder;
friend class HopeFearDecoder;
public:
StatisticsBasedScorer(const std::string& name, const std::string& config);

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace TERCpp
{
string alignmentStruct::toString()
{
stringstream s;
string alignmentStruct::toString()
{
stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
s << "Nothing to be printed" <<endl;
return s.str();
}
s << "Nothing to be printed" <<endl;
return s.str();
}
// alignmentStruct::alignmentStruct()
// {
@ -99,7 +99,7 @@ namespace TERCpp
// return s.str();
// }
/* The distance of the shift. */
/* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp
{
class alignmentStruct
{
private:
public:
class alignmentStruct
{
private:
public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -53,14 +53,14 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
string toString();
};
vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
string toString();
};
}
#endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
class bestShiftStruct
{
private:
public:
class bestShiftStruct
{
private:
public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -55,16 +55,16 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
terShift m_best_shift;
terAlignment m_best_align;
bool m_empty;
terShift m_best_shift;
terAlignment m_best_align;
bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
// double cost;
};
};
}
#endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -28,156 +28,142 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
/* hashMap::~hashMap()
{
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
/* hashMap::~hashMap()
{
long foundKey;
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
int hashMap::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
}
return 0;
}
int hashMap::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
/**
* long hashMap::hashValue ( string key )
* @param key
* @return
*/
long hashMap::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
}
return 0;
}
/**
* long hashMap::hashValue ( string key )
* @param key
* @return
*/
long hashMap::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMap::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMap::addHasher ( string key, string value )
{
if ( trouve ( hashValue ( key ) ) ==0 )
{
}
/**
* void hashMap::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMap::addHasher ( string key, string value )
{
if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
stringHasher H ( hashValue ( key ),key,value );
stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
stringHasher hashMap::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher defaut(0,"","");
m_hasher.push_back ( H );
}
}
stringHasher hashMap::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
return defaut;
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
}
string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
}
return defaut;
}
string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return "";
return ( *l_hasher ).getValue();
}
string hashMap::searchValue ( string value )
{
}
return "";
}
string hashMap::searchValue ( string value )
{
// long searchKey=hashValue ( key );
// long foundKey;
string foundValue;
string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundValue= ( *l_hasher ).getValue();
if ( foundValue.compare ( value ) == 0 )
{
return ( *l_hasher ).getKey();
}
}
return "";
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundValue= ( *l_hasher ).getValue();
if ( foundValue.compare ( value ) == 0 ) {
return ( *l_hasher ).getKey();
}
}
return "";
}
void hashMap::setValue ( string key , string value )
{
long searchKey=hashValue ( key );
long foundKey;
void hashMap::setValue ( string key , string value )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
}
}
/**
*
*/
void hashMap::printHash()
{
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
/**
*
*/
void hashMap::printHash()
{
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace
{
class hashMap
{
private:
vector<stringHasher> m_hasher;
class hashMap
{
private:
vector<stringHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, string value );
stringHasher getHasher ( string key );
string getValue ( string key );
string searchValue ( string key );
void setValue ( string key , string value );
void printHash();
vector<stringHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, string value );
stringHasher getHasher ( string key );
string getValue ( string key );
string searchValue ( string key );
void setValue ( string key , string value );
void printHash();
vector<stringHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -28,117 +28,108 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
/* hashMapInfos::~hashMap()
{
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
/* hashMapInfos::~hashMap()
{
long foundKey;
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
int hashMapInfos::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
}
return 0;
}
int hashMapInfos::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
}
return 0;
}
/**
* long hashMapInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
/**
* long hashMapInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMapInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapInfos::addHasher ( string key, vector<int> value )
{
if ( trouve ( hashValue ( key ) ) ==0 )
{
}
/**
* void hashMapInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapInfos::addHasher ( string key, vector<int> value )
{
if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
infosHasher H ( hashValue ( key ),key,value );
infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
void hashMapInfos::addValue ( string key, vector<int> value )
{
addHasher ( key, value );
}
infosHasher hashMapInfos::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
m_hasher.push_back ( H );
}
}
void hashMapInfos::addValue ( string key, vector<int> value )
{
addHasher ( key, value );
}
infosHasher hashMapInfos::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
}
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
}
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return retour;
return ( *l_hasher ).getValue();
}
}
return retour;
}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@ -158,42 +149,38 @@ namespace HashMapSpace
// }
//
void hashMapInfos::setValue ( string key , vector<int> value )
{
long searchKey=hashValue ( key );
long foundKey;
void hashMapInfos::setValue ( string key , vector<int> value )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
/**
*
*/
void hashMapInfos::printHash()
{
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
/**
*
*/
void hashMapInfos::printHash()
{
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
}
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
class hashMapInfos
{
private:
vector<infosHasher> m_hasher;
class hashMapInfos
{
private:
vector<infosHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key );
vector<int> getValue ( string key );
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key );
vector<int> getValue ( string key );
// string searchValue ( string key );
void setValue ( string key , vector<int> value );
void printHash();
string toString();
vector<infosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
void setValue ( string key , vector<int> value );
void printHash();
string toString();
vector<infosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -27,179 +27,166 @@ using namespace std;
namespace HashMapSpace
{
// hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap()
{
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapStringInfos::trouve ( long searchKey )
{
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
// hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap()
{
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapStringInfos::trouve ( long searchKey )
{
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
}
return 0;
}
int hashMapStringInfos::trouve ( string key )
{
long searchKey = hashValue ( key );
long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
int hashMapStringInfos::trouve ( string key )
{
long searchKey = hashValue ( key );
long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
}
}
return 0;
}
/**
* long hashMapStringInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapStringInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() );
/**
* long hashMapStringInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapStringInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMapStringInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapStringInfos::addHasher ( string key, vector<string> value )
{
if ( trouve ( hashValue ( key ) ) == 0 )
{
// cerr << "ICI1" <<endl;
stringInfosHasher H ( hashValue ( key ), key, value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
}
/**
* void hashMapStringInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapStringInfos::addHasher ( string key, vector<string> value )
{
if ( trouve ( hashValue ( key ) ) == 0 ) {
// cerr << "ICI1" <<endl;
stringInfosHasher H ( hashValue ( key ), key, value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
m_hasher.push_back ( H );
}
}
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
}
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
vector<string> tmp;
stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<string> tmp;
stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
void hashMapStringInfos::setValue ( string key , vector<string> value )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
void hashMapStringInfos::setValue ( string key , vector<string> value )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
string hashMapStringInfos::toString ()
{
stringstream to_return;
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
string hashMapStringInfos::toString ()
{
stringstream to_return;
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
/**
*
*/
void hashMapStringInfos::printHash()
{
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
{
return m_hasher;
}
/**
*
*/
void hashMapStringInfos::printHash()
{
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
{
return m_hasher;
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
class hashMapStringInfos
{
private:
vector<stringInfosHasher> m_hasher;
class hashMapStringInfos
{
private:
vector<stringInfosHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key );
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key );
// string searchValue ( string key );
void setValue ( string key , vector<string> value );
void printHash();
string toString();
vector<stringInfosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
void setValue ( string key , vector<string> value );
void printHash();
string toString();
vector<stringInfosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
// infosHasher::~infosHasher(){};*/
long infosHasher::getHashKey()
{
return m_hashKey;
}
string infosHasher::getKey()
{
return m_key;
}
vector<int> infosHasher::getValue()
{
return m_value;
}
void infosHasher::setValue ( vector<int> value )
{
m_value=value;
}
string infosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
long infosHasher::getHashKey()
{
return m_hashKey;
}
string infosHasher::getKey()
{
return m_key;
}
vector<int> infosHasher::getValue()
{
return m_value;
}
void infosHasher::setValue ( vector<int> value )
{
m_value=value;
}
string infosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
class infosHasher
{
private:
long m_hashKey;
string m_key;
vector<int> m_value;
class infosHasher
{
private:
long m_hashKey;
string m_key;
vector<int> m_value;
public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey();
string getKey();
vector<int> getValue();
void setValue ( vector<int> value );
string toString();
public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey();
string getKey();
vector<int> getValue();
void setValue ( vector<int> value );
string toString();
};
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueTxt;
}
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueTxt;
}
// stringHasher::~stringHasher(){};*/
long stringHasher::getHashKey()
{
return m_hashKey;
}
string stringHasher::getKey()
{
return m_key;
}
string stringHasher::getValue()
{
return m_value;
}
void stringHasher::setValue ( string value )
{
m_value=value;
}
long stringHasher::getHashKey()
{
return m_hashKey;
}
string stringHasher::getKey()
{
return m_key;
}
string stringHasher::getValue()
{
return m_value;
}
void stringHasher::setValue ( string value )
{
m_value=value;
}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace
{
class stringHasher
{
private:
long m_hashKey;
string m_key;
string m_value;
class stringHasher
{
private:
long m_hashKey;
string m_key;
string m_value;
public:
stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey();
string getKey();
string getValue();
void setValue ( string value );
public:
stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey();
string getKey();
string getValue();
void setValue ( string value );
};
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
// stringInfosHasher::~stringInfosHasher(){};*/
long stringInfosHasher::getHashKey()
{
return m_hashKey;
}
string stringInfosHasher::getKey()
{
return m_key;
}
vector<string> stringInfosHasher::getValue()
{
return m_value;
}
void stringInfosHasher::setValue ( vector<string> value )
{
m_value=value;
}
string stringInfosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
long stringInfosHasher::getHashKey()
{
return m_hashKey;
}
string stringInfosHasher::getKey()
{
return m_key;
}
vector<string> stringInfosHasher::getValue()
{
return m_value;
}
void stringInfosHasher::setValue ( vector<string> value )
{
m_value=value;
}
string stringInfosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
class stringInfosHasher
{
private:
long m_hashKey;
string m_key;
vector<string> m_value;
class stringInfosHasher
{
private:
long m_hashKey;
string m_key;
vector<string> m_value;
public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey();
string getKey();
vector<string> getValue();
void setValue ( vector<string> value );
string toString();
public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey();
string getKey();
vector<string> getValue();
void setValue ( vector<string> value );
string toString();
};
};
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -24,191 +24,163 @@ using namespace std;
namespace TERCpp
{
terAlignment::terAlignment()
{
terAlignment::terAlignment()
{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
// TERshift[] allshifts = null;
// TERshift[] allshifts = null;
numEdits=0;
numWords=0;
bestRef="";
numEdits=0;
numWords=0;
bestRef="";
numIns=0;
numDel=0;
numSub=0;
numSft=0;
numWsf=0;
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift );
numIns=0;
numDel=0;
numSub=0;
numSft=0;
numWsf=0;
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
s << endl;
s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
if ( ( int ) sizeof ( alignment ) >0 )
{
s << "Alignment: (";
if ( ( int ) sizeof ( alignment ) >0 ) {
s << "Alignment: (";
// s += "\nAlignment: (";
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
{
s << alignment[i];
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
s << alignment[i];
// s+=alignment[i];
}
// s += ")";
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0";
s << "NumShifts: 0";
}
else
{
// s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
{
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str();
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
// s += ")";
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
s << "NumShifts: 0";
} else {
// s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str();
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
stringstream s;
s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ )
{
if ( i == 0 )
{
s << arr.at ( i );
}
else
{
s << delim << arr.at ( i );
}
}
return s.str();
stringstream s;
s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ ) {
if ( i == 0 ) {
s << arr.at ( i );
} else {
s << delim << arr.at ( i );
}
}
return s.str();
// return "";
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( numWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( averageWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0) {
for(int i = 0; i < (int)allshifts.size(); ++i) {
numWsf += allshifts[i].size();
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
{
return 1.0;
}
if ( numWords <= 0.0 )
{
return 0.0;
}
return ( double ) numEdits / numWords;
numSft = allshifts.size();
}
if((int)alignment.size()>0 ) {
for(int i = 0; i < (int)alignment.size(); ++i) {
switch (alignment[i]) {
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
{
return 1.0;
}
if ( averageWords <= 0.0 )
{
return 0.0;
}
return ( double ) numEdits / averageWords;
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i) {
char alignInfo=alignment.at(i);
if (alignInfo == 'A' ) {
alignInfo='A';
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0)
{
for(int i = 0; i < (int)allshifts.size(); ++i)
{
numWsf += allshifts[i].size();
}
numSft = allshifts.size();
}
if((int)alignment.size()>0 )
{
for(int i = 0; i < (int)alignment.size(); ++i)
{
switch (alignment[i])
{
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
}
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i)
{
char alignInfo=alignment.at(i);
if (alignInfo == 'A' )
{
alignInfo='A';
}
if (i==0)
{
to_return << alignInfo;
}
else
{
to_return << " " << alignInfo;
}
}
return to_return.str();
if (i==0) {
to_return << alignInfo;
} else {
to_return << " " << alignInfo;
}
}
return to_return.str();
}
string terAlignment::printAllShifts()
{
stringstream to_return;
if ( ( int ) allshifts.size() == 0 )
{
stringstream to_return;
if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
to_return << "NbrShifts: 0";
}
else
{
to_return << "NbrShifts: 0";
} else {
// s += "\nNumShifts: " + (int)allshifts.size();
to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
{
to_return << "\t" ;
to_return << ( ( terShift ) allshifts[i] ).toString();
to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
to_return << "\t" ;
to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
return to_return.str();
}
}
return to_return.str();
}
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -34,41 +34,41 @@ using namespace std;
namespace TERCpp
{
class terAlignment
{
private:
public:
class terAlignment
{
private:
public:
terAlignment();
string toString();
void scoreDetails();
terAlignment();
string toString();
void scoreDetails();
vector<string> ref;
vector<string> hyp;
vector<string> aftershift;
vector<terShift> allshifts;
vector<int> hyp_int;
vector<int> aftershift_int;
vector<string> ref;
vector<string> hyp;
vector<string> aftershift;
vector<terShift> allshifts;
vector<int> hyp_int;
vector<int> aftershift_int;
double numEdits;
double numWords;
double averageWords;
vector<char> alignment;
string bestRef;
double numEdits;
double numWords;
double averageWords;
vector<char> alignment;
string bestRef;
int numIns;
int numDel;
int numSub;
int numSft;
int numWsf;
int numIns;
int numDel;
int numSub;
int numSft;
int numWsf;
string join ( string delim, vector<string> arr );
double score();
double scoreAv();
string printAlignments();
string printAllShifts();
};
string join ( string delim, vector<string> arr );
double score();
double scoreAv();
string printAlignments();
string printAllShifts();
};
}
#endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -42,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
terShift::terShift ()
{
start = 0;
end = 0;
moveto = 0;
newloc = 0;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
cost=1.0;
}
terShift::terShift ()
{
start = 0;
end = 0;
moveto = 0;
newloc = 0;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
shifted = _shifted;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
shifted = _shifted;
cost=1.0;
}
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@ -78,44 +78,38 @@ namespace TERCpp
// return retour;
// }
string terShift::toString()
{
stringstream s;
s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 )
{
s << " (" << vectorToString ( shifted ) << ")";
}
return s.str();
}
string terShift::toString()
{
stringstream s;
s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 ) {
s << " (" << vectorToString ( shifted ) << ")";
}
return s.str();
}
/* The distance of the shift. */
int terShift::distance()
{
if ( moveto < start )
{
return start - moveto;
}
else if ( moveto > end )
{
return moveto - end;
}
else
{
return moveto - start;
}
}
/* The distance of the shift. */
int terShift::distance()
{
if ( moveto < start ) {
return start - moveto;
} else if ( moveto > end ) {
return moveto - end;
} else {
return moveto - start;
}
}
bool terShift::leftShift()
{
return ( moveto < start );
}
bool terShift::leftShift()
{
return ( moveto < start );
}
int terShift::size()
{
return ( end - start ) + 1;
}
int terShift::size()
{
return ( end - start ) + 1;
}
// terShift terShift::operator=(terShift t)
// {
//

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -34,32 +34,32 @@ using namespace Tools;
namespace TERCpp
{
class terShift
{
private:
public:
class terShift
{
private:
public:
terShift();
terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString();
int distance() ;
bool leftShift();
int size();
terShift();
terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString();
int distance() ;
bool leftShift();
int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
int start;
int end;
int moveto;
int newloc;
vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
};
int start;
int end;
int moveto;
int newloc;
vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -41,62 +41,62 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
typedef vector<terShift> vecTerShift;
/**
@author
*/
class terCalc
{
private :
typedef vector<terShift> vecTerShift;
/**
@author
*/
class terCalc
{
private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
WERalignment l_WERalignment;
WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
hashMap bagOfWords;
int TAILLE_PERMUT_MAX;
// Increments internes
int NBR_SEGS_EVALUATED;
int NBR_PERMUTS_CONSID;
int NBR_BS_APPELS;
int DIST_MAX_PERMUT;
bool PRINT_DEBUG;
hashMap bagOfWords;
int TAILLE_PERMUT_MAX;
// Increments internes
int NBR_SEGS_EVALUATED;
int NBR_PERMUTS_CONSID;
int NBR_BS_APPELS;
int DIST_MAX_PERMUT;
bool PRINT_DEBUG;
// Utilisés dans minDistEdit et ils ne sont pas réajustés
double S[1000][1000];
char P[1000][1000];
vector<vecInt> refSpans;
vector<vecInt> hypSpans;
int TAILLE_BEAM;
// Utilisés dans minDistEdit et ils ne sont pas réajustés
double S[1000][1000];
char P[1000][1000];
vector<vecInt> refSpans;
vector<vecInt> hypSpans;
int TAILLE_BEAM;
public:
int shift_cost;
int insert_cost;
int delete_cost;
int substitute_cost;
int match_cost;
double infinite;
terCalc();
public:
int shift_cost;
int insert_cost;
int delete_cost;
int substitute_cost;
int match_cost;
double infinite;
terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
void setDebugMode ( bool b );
void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
terAlignment TER ( vector<string> hyp, vector<string> ref );
terAlignment TER ( vector<int> hyp, vector<int> ref );
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
alignmentStruct permuter ( vector<string> words, terShift s );
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
};
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
terAlignment TER ( vector<string> hyp, vector<string> ref );
terAlignment TER ( vector<int> hyp, vector<int> ref );
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
alignmentStruct permuter ( vector<string> words, terShift s );
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
};
}

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@ -35,32 +35,31 @@ using namespace std;
namespace Tools
{
typedef vector<double> vecDouble;
typedef vector<char> vecChar;
typedef vector<int> vecInt;
typedef vector<float> vecFloat;
typedef vector<size_t> vecSize_t;
typedef vector<string> vecString;
typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment;
typedef vector<double> vecDouble;
typedef vector<char> vecChar;
typedef vector<int> vecInt;
typedef vector<float> vecFloat;
typedef vector<size_t> vecSize_t;
typedef vector<string> vecString;
typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment;
struct param
{
bool debugMode;
string referenceFile; // path to the resources
string hypothesisFile; // path to the configuration files
string outputFileExtension;
string outputFileName;
bool noPunct;
bool caseOn;
bool normalize;
bool tercomLike;
bool sgmlInputs;
bool noTxtIds;
bool printAlignments;
bool WER;
int debugLevel;
struct param {
bool debugMode;
string referenceFile; // path to the resources
string hypothesisFile; // path to the configuration files
string outputFileExtension;
string outputFileName;
bool noPunct;
bool caseOn;
bool normalize;
bool tercomLike;
bool sgmlInputs;
bool noTxtIds;
bool printAlignments;
bool WER;
int debugLevel;
};
// param = { false, "","","","" };
@ -68,35 +67,35 @@ struct param
// private:
// public:
string vectorToString ( vector<string> vec );
string vectorToString ( vector<char> vec );
string vectorToString ( vector<int> vec );
string vectorToString ( vector<string> vec, string s );
string vectorToString ( vector<char> vec, string s );
string vectorToString ( vector<int> vec, string s );
string vectorToString ( vector<bool> vec, string s );
string vectorToString ( char* vec, string s, int taille );
string vectorToString ( int* vec, string s , int taille );
string vectorToString ( bool* vec, string s , int taille );
vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> copyVector ( vector<string> vec );
vector<int> copyVector ( vector<int> vec );
vector<float> copyVector ( vector<float> vec );
vector<string> stringToVector ( string s, string tok );
vector<string> stringToVector ( char s, string tok );
vector<string> stringToVector ( int s, string tok );
vector<int> stringToVectorInt ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok );
string lowerCase(string str);
string removePunct(string str);
string tokenizePunct(string str);
string removePunctTercom(string str);
string normalizeStd(string str);
string printParams(param p);
string join ( string delim, vector<string> arr );
string vectorToString ( vector<string> vec );
string vectorToString ( vector<char> vec );
string vectorToString ( vector<int> vec );
string vectorToString ( vector<string> vec, string s );
string vectorToString ( vector<char> vec, string s );
string vectorToString ( vector<int> vec, string s );
string vectorToString ( vector<bool> vec, string s );
string vectorToString ( char* vec, string s, int taille );
string vectorToString ( int* vec, string s , int taille );
string vectorToString ( bool* vec, string s , int taille );
vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> copyVector ( vector<string> vec );
vector<int> copyVector ( vector<int> vec );
vector<float> copyVector ( vector<float> vec );
vector<string> stringToVector ( string s, string tok );
vector<string> stringToVector ( char s, string tok );
vector<string> stringToVector ( int s, string tok );
vector<int> stringToVectorInt ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok );
string lowerCase(string str);
string removePunct(string str);
string tokenizePunct(string str);
string removePunctTercom(string str);
string normalizeStd(string str);
string printParams(param p);
string join ( string delim, vector<string> arr );
// };
param copyParam(param p);
param copyParam(param p);
}
#endif

View File

@ -43,7 +43,8 @@ private:
};
// load hypothesis from candidate output
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
{
ifstream cand(candFile.c_str());
if (!cand.good()) throw runtime_error("Error opening candidate file");
@ -61,7 +62,8 @@ vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
}
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) {
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
{
vector<ScoreStats> entries;
Data data(g_scorer);
@ -81,8 +83,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
if (nbest_input) {
entries = loadNBest(candFile);
}
else {
} else {
entries = loadCand(candFile);
}

View File

@ -77,7 +77,7 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
@ -157,7 +157,7 @@ int main(int argc, char** argv)
do {
size_t equals = buffer.find_last_of("=");
UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
<< buffer << "'");
<< buffer << "'");
string name = buffer.substr(0,equals);
names.push_back(name);
initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
@ -183,7 +183,7 @@ int main(int argc, char** argv)
//Make sure that SparseVector encodes dense feature names as 0..n-1.
for (size_t i = 0; i < names.size(); ++i) {
size_t id = SparseVector::encode(names[i]);
assert(id == i);
assert(id == i);
if (verbose) cerr << names[i] << " " << initParams[i] << endl;
}
@ -246,12 +246,12 @@ int main(int argc, char** argv)
int iNumUpdates = 0;
ValType totalLoss = 0.0;
size_t sentenceIndex = 0;
for(decoder->reset();!decoder->finished(); decoder->next()) {
for(decoder->reset(); !decoder->finished(); decoder->next()) {
HopeFearData hfd;
decoder->HopeFear(bg,wv,&hfd);
// Update weights
if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference
MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference

View File

@ -3,26 +3,27 @@
int main(int argc, char* argv[]){
int main(int argc, char* argv[])
{
const char * is_reordering = "false";
const char * is_reordering = "false";
if (!(argc == 5 || argc == 4)) {
// Tell the user how to run the program
std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
//std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
return 1;
}
if (!(argc == 5 || argc == 4)) {
// Tell the user how to run the program
std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
//std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
return 1;
}
if (argc == 5) {
is_reordering = argv[4];
}
if (argc == 5) {
is_reordering = argv[4];
}
createProbingPT(argv[1], argv[2], argv[3], is_reordering);
createProbingPT(argv[1], argv[2], argv[3], is_reordering);
util::PrintUsage(std::cout);
return 0;
util::PrintUsage(std::cout);
return 0;
}

View File

@ -26,36 +26,37 @@
#include <unistd.h>
#include <fcntl.h>
int main(int argc, char* argv[]) {
if (argc != 2) {
// Tell the user how to run the program
std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
return 1;
int main(int argc, char* argv[])
{
if (argc != 2) {
// Tell the user how to run the program
std::cerr << "Usage: " << argv[0] << " path_to_directory" << std::endl;
return 1;
}
QueryEngine queries(argv[1]);
//Interactive search
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
while (true) {
std::string cinstr = "";
getline(std::cin, cinstr);
if (cinstr == "exit") {
break;
} else {
//Actual lookup
std::pair<bool, std::vector<target_text> > query_result;
query_result = queries.query(StringPiece(cinstr));
if (query_result.first) {
queries.printTargetInfo(query_result.second);
} else {
std::cout << "Key not found!" << std::endl;
}
}
}
QueryEngine queries(argv[1]);
util::PrintUsage(std::cout);
//Interactive search
std::cout << "Please enter a string to be searched, or exit to exit." << std::endl;
while (true){
std::string cinstr = "";
getline(std::cin, cinstr);
if (cinstr == "exit"){
break;
}else{
//Actual lookup
std::pair<bool, std::vector<target_text> > query_result;
query_result = queries.query(StringPiece(cinstr));
if (query_result.first) {
queries.printTargetInfo(query_result.second);
} else {
std::cout << "Key not found!" << std::endl;
}
}
}
util::PrintUsage(std::cout);
return 0;
return 0;
}

View File

@ -53,13 +53,15 @@ using namespace std;
namespace po = boost::program_options;
typedef multimap<float,string> Lines;
static void usage(const po::options_description& desc, char** argv) {
cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
cerr << desc << endl;
static void usage(const po::options_description& desc, char** argv)
{
cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
cerr << desc << endl;
}
//Find top n translations of source, and send them to output
static void outputTopN(Lines lines, size_t maxPhrases, ostream& out) {
static void outputTopN(Lines lines, size_t maxPhrases, ostream& out)
{
size_t count = 0;
for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) {
out << i->second << endl;
@ -92,7 +94,7 @@ static void outputTopN(const Phrase& sourcePhrase, const multimap<float,const Ta
out << endl;
}
}*/
int main(int argc, char** argv)
int main(int argc, char** argv)
{
bool help;
string input_file;
@ -112,7 +114,7 @@ int main(int argc, char** argv)
cmdline_options.add(desc);
po::variables_map vm;
po::parsed_options parsed = po::command_line_parser(argc,argv).
options(cmdline_options).run();
options(cmdline_options).run();
po::store(parsed, vm);
po::notify(vm);
if (help) {
@ -135,7 +137,7 @@ int main(int argc, char** argv)
mosesargs.push_back("-f");
mosesargs.push_back(config_file);
boost::scoped_ptr<Parameter> params(new Parameter());
boost::scoped_ptr<Parameter> params(new Parameter());
char** mosesargv = new char*[mosesargs.size()];
for (size_t i = 0; i < mosesargs.size(); ++i) {
mosesargv[i] = new char[mosesargs[i].length() + 1];

View File

@ -201,7 +201,7 @@ int main(int argc, char* argv[])
cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
manager.OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),cout);
staticData.GetReportAllFactors(),cout);
}
}

View File

@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class
IFVERBOSE(1) {
PrintUserTime("Created input-output object");
PrintUserTime("Created input-output object");
}
IOWrapper* ioWrapper = new IOWrapper();
@ -161,28 +161,26 @@ int main(int argc, char** argv)
#ifdef PT_UG
bool spe = params.isParamSpecified("spe-src");
if (spe) {
// simulated post-editing: always run single-threaded!
// simulated post-editing: always run single-threaded!
task->Run();
delete task;
string src,trg,aln;
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
<< "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
<< "missing update data for simulated post-editing.");
<< "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
<< "missing update data for simulated post-editing.");
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
{
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
if (sapt) sapt->add(src,trg,aln);
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
}
}
else
<< "missing update data for simulated post-editing.");
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
if (sapt) sapt->add(src,trg,aln);
VERBOSE(1,"[" << HERE << " added src] " << src << endl);
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
}
} else
#endif
pool.Submit(task);
pool.Submit(task);
#else
task->Run();
delete task;

View File

@ -80,7 +80,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
@ -121,7 +121,7 @@ int main(int argc, char** argv)
// set up read/writing class
IFVERBOSE(1) {
PrintUserTime("Created input-output object");
PrintUserTime("Created input-output object");
}
IOWrapper* ioWrapper = new IOWrapper();

View File

@ -46,14 +46,13 @@ public:
* contains such an object then returns a pointer to it; otherwise a new
* one is inserted.
*/
private:
private:
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
public:
public:
template<typename ALNREP>
AlignmentInfo const *
Add(ALNREP const & aln)
{
AlignmentInfo const *
Add(ALNREP const & aln) {
return this->Add(AlignmentInfo(aln));
}

View File

@ -13,11 +13,11 @@ namespace Moses
* print surface factor only for the given phrase
*/
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
@ -26,12 +26,12 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
"Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
"Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
@ -45,7 +45,7 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
// but there are scripts and tools that expect the output of -T to look like
// that.
void BaseManager::WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const
const ApplicationContext &context) const
{
assert(!context.empty());
ApplicationContext::const_reverse_iterator p = context.rbegin();

View File

@ -17,23 +17,22 @@ protected:
const InputType &m_source; /**< source sentence to be translated */
BaseManager(const InputType &source)
:m_source(source)
{}
:m_source(source) {
}
// output
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputSurface(std::ostream &out,
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const;
const ApplicationContext &context) const;
template <class T>
void ShiftOffsets(std::vector<T> &offsets, T shift) const
{
void ShiftOffsets(std::vector<T> &offsets, T shift) const {
T currPos = shift;
for (size_t i = 0; i < offsets.size(); ++i) {
if (offsets[i] == 0) {
@ -46,8 +45,8 @@ protected:
}
public:
virtual ~BaseManager()
{}
virtual ~BaseManager() {
}
//! the input sentence being decoded
const InputType& GetSource() const {

View File

@ -162,16 +162,16 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
if (m_translations.size() > 1) {
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
"Non-monotonic future score: "
<< m_translations.Get(0)->GetFutureScore() << " vs. "
<< m_translations.Get(1)->GetFutureScore());
"Non-monotonic future score: "
<< m_translations.Get(0)->GetFutureScore() << " vs. "
<< m_translations.Get(1)->GetFutureScore());
}
if (m_hypotheses.size() > 1) {
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
"Non-monotonic total score"
<< m_hypotheses[0]->GetTotalScore() << " vs. "
<< m_hypotheses[1]->GetTotalScore());
"Non-monotonic total score"
<< m_hypotheses[0]->GetTotalScore() << " vs. "
<< m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@ -446,9 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
if (!Empty()) {
HypothesisQueueItem *check = Dequeue(true);
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
"Non-monotonic total score: "
<< item->GetHypothesis()->GetTotalScore() << " vs. "
<< check->GetHypothesis()->GetTotalScore());
"Non-monotonic total score: "
<< item->GetHypothesis()->GetTotalScore() << " vs. "
<< check->GetHypothesis()->GetTotalScore());
}
// Logging for the criminally insane

View File

@ -85,7 +85,7 @@ void ChartCell::PruneToSize()
* \param allChartCells entire chart - needed to look up underlying hypotheses
*/
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
, const ChartCellCollection &allChartCells)
, const ChartCellCollection &allChartCells)
{
const StaticData &staticData = StaticData::Instance();

View File

@ -97,7 +97,7 @@ public:
~ChartCell();
void Decode(const ChartTranslationOptionList &transOptList
,const ChartCellCollection &allChartCells);
,const ChartCellCollection &allChartCells);
//! Get all hypotheses in the cell that have the specified constituent label
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {

View File

@ -124,8 +124,7 @@ public:
const ChartCellLabel *Find(size_t idx) const {
try {
return m_map.at(idx);
}
catch (const std::out_of_range& oor) {
} catch (const std::out_of_range& oor) {
return NULL;
}
}

View File

@ -61,8 +61,7 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
m_prevHypos.reserve(childEntries.size());
std::vector<HypothesisDimension>::const_iterator iter;
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter)
{
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
m_prevHypos.push_back(iter->GetHypothesis());
}
}
@ -85,17 +84,14 @@ ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
ChartHypothesis::~ChartHypothesis()
{
// delete feature function states
for (unsigned i = 0; i < m_ffStates.size(); ++i)
{
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
delete m_ffStates[i];
}
// delete hypotheses that are not in the chart (recombined away)
if (m_arcList)
{
if (m_arcList) {
ChartArcList::iterator iter;
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)
{
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *hypo = *iter;
Delete(hypo);
}
@ -112,25 +108,19 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
{
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos)
{
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
const Word &word = GetCurrTargetPhrase().GetWord(pos);
if (word.IsNonTerminal())
{
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase);
}
else
{
} else {
outPhrase.AddWord(word);
if (placeholderFactor != NOT_FOUND)
{
if (placeholderFactor != NOT_FOUND) {
std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
if (sourcePosSet.size() == 1)
{
if (sourcePosSet.size() == 1) {
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
"No source rule");
@ -140,8 +130,7 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
UTIL_THROW_IF2(sourceWord == NULL,
"No source word");
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
if (factor)
{
if (factor) {
outPhrase.Back()[0] = factor;
}
}
@ -165,33 +154,24 @@ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phr
const TargetPhrase &tp = GetCurrTargetPhrase();
size_t targetSize = tp.GetSize();
for (size_t i = 0; i < targetSize; ++i)
{
for (size_t i = 0; i < targetSize; ++i) {
size_t pos;
if (leftRightMost == 1)
{
if (leftRightMost == 1) {
pos = i;
}
else if (leftRightMost == 2)
{
} else if (leftRightMost == 2) {
pos = targetSize - i - 1;
}
else
{
} else {
abort();
}
const Word &word = tp.GetWord(pos);
if (word.IsNonTerminal())
{
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase);
}
else
{
} else {
outPhrase.AddWord(word);
}
@ -236,20 +216,16 @@ void ChartHypothesis::EvaluateWhenApplied()
// cached in the translation option-- there is no principled distinction
const std::vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i)
{
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] ))
{
for (unsigned i = 0; i < sfs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
}
}
const std::vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
{
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] ))
{
for (unsigned i = 0; i < ffs.size(); ++i) {
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
}
}
@ -257,7 +233,7 @@ void ChartHypothesis::EvaluateWhenApplied()
// total score from current translation rule
m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
m_totalScore += m_currScoreBreakdown.GetWeightedScore();
// total scores from prev hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
@ -267,31 +243,25 @@ void ChartHypothesis::EvaluateWhenApplied()
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
{
if (!m_arcList)
{
if (loserHypo->m_arcList)
{ // we don't have an arcList, but loser does
if (!m_arcList) {
if (loserHypo->m_arcList) {
// we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
}
else
{
} else {
this->m_arcList = new ChartArcList();
}
}
else
{
if (loserHypo->m_arcList)
{ // both have an arc list: merge. delete loser
} else {
if (loserHypo->m_arcList) {
// both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
}
else
{ // loserHypo doesn't have any arcs
} else {
// loserHypo doesn't have any arcs
// DO NOTHING
}
}
@ -299,10 +269,8 @@ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
}
// sorting helper
struct CompareChartHypothesisTotalScore
{
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const
{
struct CompareChartHypothesisTotalScore {
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
}
};
@ -322,8 +290,7 @@ void ChartHypothesis::CleanupArcList()
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
if (!distinctNBest && m_arcList->size() > nBestSize)
{
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin()
, m_arcList->begin() + nBestSize - 1
@ -332,8 +299,7 @@ void ChartHypothesis::CleanupArcList()
// delete bad ones
ChartArcList::iterator iter;
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
{
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *arc = *iter;
ChartHypothesis::Delete(arc);
}
@ -343,8 +309,7 @@ void ChartHypothesis::CleanupArcList()
// set all arc's main hypo variable to this hypo
ChartArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter)
{
for (; iter != m_arcList->end() ; ++iter) {
ChartHypothesis *arc = *iter;
arc->SetWinningHypo(this);
}
@ -367,13 +332,11 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
// recombination
if (hypo.GetWinningHypothesis() != NULL &&
hypo.GetWinningHypothesis() != &hypo)
{
hypo.GetWinningHypothesis() != &hypo) {
out << "->" << hypo.GetWinningHypothesis()->GetId();
}
if (StaticData::Instance().GetIncludeLHSInSearchGraph())
{
if (StaticData::Instance().GetIncludeLHSInSearchGraph()) {
out << " " << hypo.GetTargetLHS() << "=>";
}
out << " " << hypo.GetCurrTargetPhrase()
@ -381,8 +344,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
<< " " << hypo.GetCurrSourceRange();
HypoList::const_iterator iter;
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter)
{
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
out << " " << prevHypo.GetId();
}

View File

@ -58,8 +58,8 @@ protected:
WordsRange m_currSourceWordsRange;
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
,m_lmNGram
,m_lmPrefix;
@ -82,21 +82,18 @@ protected:
public:
#ifdef USE_HYPO_POOL
void *operator new(size_t /* num_bytes */)
{
void *operator new(size_t /* num_bytes */) {
void *ptr = s_objectPool.getPtr();
return ptr;
}
//! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo)
{
static void Delete(ChartHypothesis *hypo) {
s_objectPool.freeObject(hypo);
}
#else
//! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo)
{
static void Delete(ChartHypothesis *hypo) {
delete hypo;
}
#endif
@ -109,43 +106,36 @@ public:
~ChartHypothesis();
unsigned GetId() const
{
unsigned GetId() const {
return m_id;
}
const ChartTranslationOption &GetTranslationOption() const
{
const ChartTranslationOption &GetTranslationOption() const {
return *m_transOpt;
}
//! Get the rule that created this hypothesis
const TargetPhrase &GetCurrTargetPhrase() const
{
const TargetPhrase &GetCurrTargetPhrase() const {
return m_transOpt->GetPhrase();
}
//! the source range that this hypothesis spans
const WordsRange &GetCurrSourceRange() const
{
const WordsRange &GetCurrSourceRange() const {
return m_currSourceWordsRange;
}
//! the arc list when creating n-best lists
inline const ChartArcList* GetArcList() const
{
inline const ChartArcList* GetArcList() const {
return m_arcList;
}
//! the feature function states for a particular feature \param featureID
inline const FFState* GetFFState( size_t featureID ) const
{
inline const FFState* GetFFState( size_t featureID ) const {
return m_ffStates[ featureID ];
}
//! reference back to the manager
inline const ChartManager& GetManager() const
{
inline const ChartManager& GetManager() const {
return m_manager;
}
@ -165,21 +155,17 @@ public:
void SetWinningHypo(const ChartHypothesis *hypo);
//! get the unweighted score for each feature function
const ScoreComponentCollection &GetScoreBreakdown() const
{
const ScoreComponentCollection &GetScoreBreakdown() const {
// Note: never call this method before m_currScoreBreakdown is fully computed
if (!m_scoreBreakdown.get())
{
if (!m_scoreBreakdown.get()) {
m_scoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule
if (m_transOpt)
{
if (m_transOpt) {
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
}
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
// score breakdowns from prev hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter)
{
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
}
@ -188,15 +174,12 @@ public:
}
//! get the unweighted score delta for each feature function
const ScoreComponentCollection &GetDeltaScoreBreakdown() const
{
const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
// Note: never call this method before m_currScoreBreakdown is fully computed
if (!m_deltaScoreBreakdown.get())
{
if (!m_deltaScoreBreakdown.get()) {
m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
// score breakdown from current translation rule
if (m_transOpt)
{
if (m_transOpt) {
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
}
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
@ -206,33 +189,28 @@ public:
}
//! Get the weighted total score
float GetTotalScore() const
{
float GetTotalScore() const {
// scores from current translation rule. eg. translation models & word penalty
return m_totalScore;
}
//! vector of previous hypotheses this hypo is built on
const std::vector<const ChartHypothesis*> &GetPrevHypos() const
{
const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
return m_prevHypos;
}
//! get a particular previous hypos
const ChartHypothesis* GetPrevHypo(size_t pos) const
{
const ChartHypothesis* GetPrevHypo(size_t pos) const {
return m_prevHypos[pos];
}
//! get the constituency label that covers this hypo
const Word &GetTargetLHS() const
{
const Word &GetTargetLHS() const {
return GetCurrTargetPhrase().GetTargetLHS();
}
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
const ChartHypothesis* GetWinningHypothesis() const
{
const ChartHypothesis* GetWinningHypothesis() const {
return m_winningHypo;
}

View File

@ -125,7 +125,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
}
// Generate the score breakdown of the derivation d.
boost::shared_ptr<ScoreComponentCollection>
boost::shared_ptr<ScoreComponentCollection>
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
{
const ChartHypothesis &hypo = d.edge.head->hypothesis;
@ -169,8 +169,7 @@ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
mytree->Combine(previous_trees);
return mytree;
}
else {
} else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}

View File

@ -290,12 +290,14 @@ void ChartManager::FindReachableHypotheses(
}
}
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const {
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
{
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
WriteSearchGraph(writer);
}
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const {
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
{
ChartSearchGraphWriterMoses writer(&outputSearchGraphStream, m_source.GetTranslationId());
WriteSearchGraph(writer);
}
@ -304,33 +306,33 @@ void ChartManager::OutputBest(OutputCollector *collector) const
{
const ChartHypothesis *bestHypo = GetBestHypothesis();
if (collector && bestHypo) {
const size_t translationId = m_source.GetTranslationId();
const ChartHypothesis *bestHypo = GetBestHypothesis();
OutputBestHypo(collector, bestHypo, translationId);
const size_t translationId = m_source.GetTranslationId();
const ChartHypothesis *bestHypo = GetBestHypothesis();
OutputBestHypo(collector, bestHypo, translationId);
}
}
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
}
}
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
}
}
}
void ChartManager::OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
@ -344,7 +346,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
@ -357,7 +359,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -405,9 +407,9 @@ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
}
size_t ChartManager::OutputAlignmentNBest(
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const
{
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
@ -448,7 +450,7 @@ size_t ChartManager::OutputAlignmentNBest(
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
@ -486,22 +488,22 @@ size_t ChartManager::OutputAlignmentNBest(
void ChartManager::OutputAlignment(OutputCollector *collector) const
{
if (collector == NULL) {
return;
return;
}
ostringstream out;
const ChartHypothesis *hypo = GetBestHypothesis();
if (hypo) {
Alignments retAlign;
OutputAlignment(retAlign, hypo, 0);
Alignments retAlign;
OutputAlignment(retAlign, hypo, 0);
// output alignments
Alignments::const_iterator iter;
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
const pair<size_t, size_t> &alignPoint = *iter;
out << alignPoint.first << "-" << alignPoint.second << " ";
}
// output alignments
Alignments::const_iterator iter;
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
const pair<size_t, size_t> &alignPoint = *iter;
out << alignPoint.first << "-" << alignPoint.second << " ";
}
}
out << endl;
@ -510,8 +512,8 @@ void ChartManager::OutputAlignment(OutputCollector *collector) const
}
size_t ChartManager::OutputAlignment(Alignments &retAlign,
const Moses::ChartHypothesis *hypo,
size_t startTarget) const
const Moses::ChartHypothesis *hypo,
size_t startTarget) const
{
size_t totalTargetSize = 0;
size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
@ -536,7 +538,7 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
@ -587,19 +589,19 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
{
if (collector) {
OutputDetailedTranslationReport(collector,
GetBestHypothesis(),
static_cast<const Sentence&>(m_source),
m_source.GetTranslationId());
}
if (collector) {
OutputDetailedTranslationReport(collector,
GetBestHypothesis(),
static_cast<const Sentence&>(m_source),
m_source.GetTranslationId());
}
}
void ChartManager::OutputDetailedTranslationReport(
OutputCollector *collector,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
OutputCollector *collector,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
{
if (hypo == NULL) {
return;
@ -610,24 +612,24 @@ void ChartManager::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
collector->Write(translationId, out.str());
//DIMw
const StaticData &staticData = StaticData::Instance();
//DIMw
const StaticData &staticData = StaticData::Instance();
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.GetNBestSize();
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
}
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.GetNBestSize();
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
}
}
void ChartManager::OutputTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
{
if (hypo != NULL) {
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
@ -644,10 +646,10 @@ void ChartManager::OutputTranslationOptions(std::ostream &out,
}
void ChartManager::OutputTranslationOption(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
{
ReconstructApplicationContext(*hypo, sentence, applicationContext);
out << "Trans Opt " << translationId
@ -691,16 +693,16 @@ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
void ChartManager::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
long translationId = m_source.GetTranslationId();
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
std::ostringstream out;
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
p != oovs.end(); ++p) {
out << *p;
}
out << std::endl;
collector->Write(translationId, out.str());
std::ostringstream out;
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
p != oovs.end(); ++p) {
out << *p;
}
out << std::endl;
collector->Write(translationId, out.str());
}
}
@ -709,7 +711,7 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
{
const ChartHypothesis *hypo = GetBestHypothesis();
if (collector == NULL || hypo == NULL) {
return;
return;
}
std::ostringstream out;
@ -723,14 +725,14 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
//Tree of full sentence
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
if (treeStructure != NULL) {
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
if (sff[i] == treeStructure) {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break;
}
}
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
if (sff[i] == treeStructure) {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break;
}
}
}
collector->Write(translationId, out.str());
@ -738,10 +740,10 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector
}
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const
{
if (hypo != NULL) {
@ -769,20 +771,20 @@ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
void ChartManager::OutputSearchGraph(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
std::ostringstream out;
OutputSearchGraphMoses( out);
collector->Write(translationId, out.str());
}
if (collector) {
long translationId = m_source.GetTranslationId();
std::ostringstream out;
OutputSearchGraphMoses( out);
collector->Write(translationId, out.str());
}
}
//DIMw
void ChartManager::OutputDetailedAllTranslationReport(
OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const
OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const
{
std::ostringstream out;
ApplicationContext applicationContext;
@ -813,8 +815,8 @@ void ChartManager::OutputSearchGraphHypergraph() const
{
const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) {
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
hypergraphOutputChart.Write(*this);
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
hypergraphOutputChart.Write(*this);
}
}
@ -842,7 +844,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);

View File

@ -56,49 +56,49 @@ private:
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
/* auxilliary functions for SearchGraphs */
void FindReachableHypotheses(
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void FindReachableHypotheses(
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
// output
void OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const;
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const;
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
size_t OutputAlignmentNBest(Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const;
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget) const;
size_t OutputAlignment(Alignments &retAlign,
const Moses::ChartHypothesis *hypo,
size_t startTarget) const;
const Moses::ChartHypothesis *hypo,
size_t startTarget) const;
void OutputDetailedTranslationReport(
OutputCollector *collector,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
OutputCollector *collector,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
void OutputTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
void OutputTranslationOption(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
void ReconstructApplicationContext(const ChartHypothesis &hypo,
const Sentence &sentence,
ApplicationContext &context) const;
const Sentence &sentence,
ApplicationContext &context) const;
void OutputTreeFragmentsTranslationOptions(std::ostream &out,
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
ApplicationContext &applicationContext,
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId) const;
void OutputDetailedAllTranslationReport(
OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const;
OutputCollector *collector,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const Sentence &sentence,
long translationId) const;
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
void Backtrack(const ChartHypothesis *hypo) const;
@ -126,8 +126,8 @@ public:
return m_hypoStackColl;
}
void CalcDecoderStatistics() const
{}
void CalcDecoderStatistics() const {
}
void ResetSentenceStats(const InputType& source) {
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
@ -138,22 +138,24 @@ public:
return m_hypothesisId++;
}
const ChartParser &GetParser() const { return m_parser; }
const ChartParser &GetParser() const {
return m_parser;
}
// outputs
void OutputBest(OutputCollector *collector) const;
void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const
{}
void OutputLatticeSamples(OutputCollector *collector) const {
}
void OutputAlignment(OutputCollector *collector) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
void OutputUnknowns(OutputCollector *collector) const;
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
void OutputWordGraph(OutputCollector *collector) const
{}
void OutputWordGraph(OutputCollector *collector) const {
}
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const
{}
void OutputSearchGraphSLF() const {
}
void OutputSearchGraphHypergraph() const;
};

View File

@ -65,7 +65,7 @@ public:
* \param outColl return argument
*/
virtual void GetChartRuleCollection(
const InputPath &inputPath,
const InputPath &inputPath,
size_t lastPos, // last position to consider if using lookahead
ChartParserCallback &outColl) = 0;

View File

@ -11,8 +11,8 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
}
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath,
const StackVec &stackVec)
const InputPath &inputPath,
const StackVec &stackVec)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();

View File

@ -46,8 +46,8 @@ public:
}
void EvaluateWithSourceContext(const InputType &input,
const InputPath &inputPath,
const StackVec &stackVec);
const InputPath &inputPath,
const StackVec &stackVec);
};
}

View File

@ -71,10 +71,9 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
ChartTranslationOption *transOpt = m_collection[i].get();
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
++numDiscard;
}
else if (numDiscard) {
m_collection[i - numDiscard] = m_collection[i];
++numDiscard;
} else if (numDiscard) {
m_collection[i - numDiscard] = m_collection[i];
}
}
@ -135,12 +134,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
{
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
const ChartTranslationOption &transOpt = *obj.m_collection[i];
out << transOpt << endl;
}
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
const ChartTranslationOption &transOpt = *obj.m_collection[i];
out << transOpt << endl;
}
return out;
return out;
}
}

View File

@ -13,297 +13,297 @@
namespace Moses
{
struct CNStats {
size_t created,destr,read,colls,words;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {
print(std::cerr);
}
struct CNStats {
size_t created,destr,read,colls,words;
void createOne() {
++created;
}
void destroyOne() {
++destr;
}
void collect(const ConfusionNet& cn) {
++read;
colls+=cn.GetSize();
for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
}
void print(std::ostream& out) const {
if(created>0) {
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
CNStats stats;
size_t
ConfusionNet::
GetColumnIncrement(size_t i, size_t j) const
{
(void) i;
(void) j;
return 1;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {
print(std::cerr);
}
ConfusionNet::
ConfusionNet()
: InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
void createOne() {
++created;
}
void destroyOne() {
++destr;
}
ConfusionNet::
~ConfusionNet()
{
stats.destroyOne();
void collect(const ConfusionNet& cn) {
++read;
colls+=cn.GetSize();
for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
}
ConfusionNet::
ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0; i<s.GetSize(); ++i) {
ScorePair scorePair;
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
data[i].push_back(temp);
void print(std::ostream& out) const {
if(created>0) {
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::cerr << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
}
return false;
}
CNStats stats;
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
size_t
ConfusionNet::
GetColumnIncrement(size_t i, size_t j) const
{
(void) i;
(void) j;
return 1;
}
ConfusionNet::
ConfusionNet()
: InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}
ConfusionNet::
~ConfusionNet()
{
stats.destroyOne();
}
ConfusionNet::
ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0; i<s.GetSize(); ++i) {
ScorePair scorePair;
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
data[i].push_back(temp);
}
}
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::cerr << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
}
return false;
}
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
}
#if 0
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
#endif
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
// const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
// const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0);
size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0);
std::string line;
while(getline(in,line)) {
std::istringstream is(line);
std::string word;
std::string line;
while(getline(in,line)) {
std::istringstream is(line);
std::string word;
Column col;
while(is>>word) {
Word w;
// String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) {
double prob;
if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
return false;
}
if(prob<0.0) {
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
prob=0.0;
} else if (prob>1.0) {
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
prob=1.0;
}
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
Column col;
while(is>>word) {
Word w;
// String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) {
double prob;
if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
return false;
}
if(prob<0.0) {
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
prob=0.0;
} else if (prob>1.0) {
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
prob=1.0;
}
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;
ScorePair scorePair(probs);
col.push_back(std::make_pair(w,scorePair));
}
if(col.size()) {
data.push_back(col);
ShrinkToFit(data.back());
} else break;
}
return !data.empty();
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
std::string line;
ScorePair scorePair(probs);
col.push_back(std::make_pair(w,scorePair));
}
if(col.size()) {
data.push_back(col);
ShrinkToFit(data.back());
} else break;
}
return !data.empty();
}
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
std::string line;
if(!getline(in,line)) return 0;
size_t s;
if(getline(in,line)) s=atoi(line.c_str());
else return 0;
data.resize(s);
for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
size_t s;
if(getline(in,line)) s=atoi(line.c_str());
else return 0;
data.resize(s);
for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
std::istringstream is(line);
if(!(is>>s)) return 0;
std::string word;
double prob;
data[i].resize(s);
for(size_t j=0; j<s; ++j)
if(is>>word>>prob) {
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
data[i][j].second.denseScores = std::vector<float> (1);
data[i][j].second.denseScores.push_back((float) log(prob));
if(data[i][j].second.denseScores[0]<0) {
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
data[i][j].second.denseScores[0]=0.0;
}
// String2Word(word,data[i][j].first,factorOrder);
Word& w = data[i][j].first;
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
} else return 0;
}
return !data.empty();
std::istringstream is(line);
if(!(is>>s)) return 0;
std::string word;
double prob;
data[i].resize(s);
for(size_t j=0; j<s; ++j)
if(is>>word>>prob) {
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
data[i][j].second.denseScores = std::vector<float> (1);
data[i][j].second.denseScores.push_back((float) log(prob));
if(data[i][j].second.denseScores[0]<0) {
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
data[i][j].second.denseScores[0]=0.0;
}
// String2Word(word,data[i][j].first,factorOrder);
Word& w = data[i][j].first;
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
} else return 0;
}
return !data.empty();
}
void ConfusionNet::Print(std::ostream& out) const
{
out<<"conf net: "<<data.size()<<"\n";
for(size_t i=0; i<data.size(); ++i) {
out<<i<<" -- ";
for(size_t j=0; j<data[i].size(); ++j) {
out<<"("<<data[i][j].first.ToString()<<", ";
void ConfusionNet::Print(std::ostream& out) const
{
out<<"conf net: "<<data.size()<<"\n";
for(size_t i=0; i<data.size(); ++i) {
out<<i<<" -- ";
for(size_t j=0; j<data[i].size(); ++j) {
out<<"("<<data[i][j].first.ToString()<<", ";
// dense
std::vector<float>::const_iterator iterDense;
for(iterDense = data[i][j].second.denseScores.begin();
iterDense < data[i][j].second.denseScores.end();
++iterDense) {
out<<", "<<*iterDense;
}
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
// dense
std::vector<float>::const_iterator iterDense;
for(iterDense = data[i][j].second.denseScores.begin();
iterDense < data[i][j].second.denseScores.end();
++iterDense) {
out<<", "<<*iterDense;
}
out<<"\n";
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
}
out<<"\n\n";
out<<"\n";
}
out<<"\n\n";
}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
Phrase
ConfusionNet::
GetSubString(const WordsRange&) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
//return Phrase(Input);
}
Phrase
ConfusionNet::
GetSubString(const WordsRange&) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
//return Phrase(Input);
}
std::string
ConfusionNet::
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
{
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return "";
}
std::string
ConfusionNet::
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
{
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return "";
}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
const Word& ConfusionNet::GetWord(size_t) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
}
const Word& ConfusionNet::GetWord(size_t) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
}
#ifdef _WIN32
#pragma warning(default:4716)
#endif
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
{
cn.Print(out);
return out;
}
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
{
cn.Print(out);
return out;
}
TranslationOptionCollection*
ConfusionNet::
CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold
= StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}
TranslationOptionCollection*
ConfusionNet::
CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold
= StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}
}

View File

@ -49,8 +49,8 @@ public:
DecodeGraph(size_t id)
: m_id(id)
, m_maxChartSpan(NOT_FOUND)
, m_backoff(0)
{}
, m_backoff(0) {
}
// for chart decoding
DecodeGraph(size_t id, size_t maxChartSpan)

View File

@ -198,11 +198,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
const Word *wordIP = NULL;
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
const Word &tempWord = phraseFromIP.GetWord(i);
if (!tempWord.IsEpsilon()) {
wordIP = &tempWord;
break;
}
const Word &tempWord = phraseFromIP.GetWord(i);
if (!tempWord.IsEpsilon()) {
wordIP = &tempWord;
break;
}
}
// const WordsRange &range = inputPath.GetWordsRange();
@ -237,7 +237,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
const size_t tableLimit = phraseDictionary->GetTableLimit();
const TargetPhraseCollectionWithSourcePhrase *phraseColl
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
if (phraseColl != NULL) {

View File

@ -502,8 +502,8 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
* phrase translated.
*/
FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
if (!m_enabled) return new BleuScoreState();

View File

@ -116,27 +116,27 @@ public:
size_t skip = 0) const;
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
int featureID,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
bool Enabled() const {
return m_enabled;

View File

@ -11,8 +11,8 @@ namespace Moses
class ConstrainedDecodingState : public FFState
{
public:
ConstrainedDecodingState()
{}
ConstrainedDecodingState() {
}
ConstrainedDecodingState(const Hypothesis &hypo);
ConstrainedDecodingState(const ChartHypothesis &hypo);
@ -42,23 +42,23 @@ public:
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -20,8 +20,8 @@ class ControlRecombinationState : public FFState
{
public:
ControlRecombinationState(const ControlRecombination &ff)
:m_ff(ff)
{}
:m_ff(ff) {
}
ControlRecombinationState(const Hypothesis &hypo, const ControlRecombination &ff);
ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff);
@ -58,22 +58,22 @@ public:
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -8,18 +8,18 @@ using namespace std;
namespace Moses
{
CountNonTerms::CountNonTerms(const std::string &line)
:StatelessFeatureFunction(line)
,m_all(true)
,m_sourceSyntax(false)
,m_targetSyntax(false)
:StatelessFeatureFunction(line)
,m_all(true)
,m_sourceSyntax(false)
,m_targetSyntax(false)
{
ReadParameters();
}
void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
const StaticData &staticData = StaticData::Instance();
@ -27,33 +27,33 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
size_t indScore = 0;
if (m_all) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
if (m_targetSyntax) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
if (m_sourceSyntax) {
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
scoreBreakdown.PlusEquals(this, scores);
@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
if (key == "all") {
m_all = Scan<bool>(value);
} else if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value);
m_sourceSyntax = Scan<bool>(value);
} else if (key == "target-syntax") {
m_targetSyntax = Scan<bool>(value);
m_targetSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}

View File

@ -14,30 +14,30 @@ public:
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(
const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void SetParameter(const std::string& key, const std::string& value);

View File

@ -22,44 +22,44 @@ int CoveredReferenceState::Compare(const FFState& other) const
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
} else {
multiset<string>::const_iterator thisIt, otherIt;
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
thisIt != m_coveredRef.end();
thisIt++, otherIt++) {
thisIt != m_coveredRef.end();
thisIt++, otherIt++) {
if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
}
}
return 0;
// return m_coveredRef == otherState.m_coveredRef;
// if (m_coveredRef == otherState.m_coveredRef)
// return 0;
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
}
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
long id = input.GetTranslationId();
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
multiset<string> covered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
refIt->second.begin(), refIt->second.end(),
inserter(covered, covered.begin()));
refIt->second.begin(), refIt->second.end(),
inserter(covered, covered.begin()));
vector<float> scores;
scores.push_back(covered.size());
@ -67,7 +67,8 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
estimatedFutureScore->Assign(this, scores);
}
void CoveredReferenceFeature::Load() {
void CoveredReferenceFeature::Load()
{
InputFileStream refFile(m_path);
std::string line;
const StaticData &staticData = StaticData::Instance();
@ -76,7 +77,7 @@ void CoveredReferenceFeature::Load() {
vector<string> words = Tokenize(line, " ");
multiset<string> wordSet;
// TODO make Tokenize work with other containers than vector
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
m_refs.insert(make_pair(sentenceID++, wordSet));
}
}
@ -107,15 +108,15 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
set_difference(refIt->second.begin(), refIt->second.end(),
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
inserter(remaining, remaining.begin()));
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
inserter(remaining, remaining.begin()));
// which of the remaining words are present in the current phrase
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
multiset<string> newCovered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
remaining.begin(), remaining.end(),
inserter(newCovered, newCovered.begin()));
remaining.begin(), remaining.end(),
inserter(newCovered, newCovered.begin()));
vector<float> estimateScore =
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);

View File

@ -52,20 +52,20 @@ public:
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -63,30 +63,30 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void SetContainer(const DecodeStep *container) {
m_container = container;

View File

@ -48,22 +48,22 @@ public:
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
};
}

View File

@ -93,12 +93,16 @@ public:
}
static const DynamicCacheBasedLanguageModel* Instance(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name];
}
static DynamicCacheBasedLanguageModel* InstanceNonConst(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()){ return NULL; }
if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name];
}
@ -126,29 +130,29 @@ public:
void Clear();
virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void SetQueryType(size_t type);
void SetScoreType(size_t type);

View File

@ -18,8 +18,8 @@ protected:
public:
ExternalFeatureState(int stateSize)
:m_stateSize(stateSize)
,m_data(NULL)
{}
,m_data(NULL) {
}
ExternalFeatureState(int stateSize, void *data);
~ExternalFeatureState() {
@ -52,22 +52,22 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -242,7 +242,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);
#ifdef HAVE_VW
MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureSourceBagOfWords);
@ -322,22 +322,22 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
vector<string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
ffs.push_back(ffName);
}
vector<string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
ffs.push_back(ffName);
}
vector<string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const string &ffName = *iterVec;
std::cerr << ffName << " ";
}
vector<string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const string &ffName = *iterVec;
std::cerr << ffName << " ";
}
std::cerr << std::endl;
std::cerr << std::endl;
}
} // namespace Moses

View File

@ -38,8 +38,8 @@ void FeatureFunction::Destroy()
void FeatureFunction::CallChangeSource(InputType *&input)
{
for (size_t i = 0; i < s_staticColl.size(); ++i) {
const FeatureFunction &ff = *s_staticColl[i];
ff.ChangeSource(input);
const FeatureFunction &ff = *s_staticColl[i];
ff.ChangeSource(input);
}
}

View File

@ -111,13 +111,13 @@ public:
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const = 0;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding
virtual void ChangeSource(InputType *&input) const
{}
virtual void ChangeSource(InputType *&input) const {
}
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
@ -127,12 +127,12 @@ public:
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const = 0;
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
@ -141,7 +141,7 @@ public:
// For pb models, stackvec is NULL.
// No FF should set estimatedFutureScore in both overloads!
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const = 0;
, const TranslationOptionList &translationOptionList) const = 0;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();

View File

@ -165,11 +165,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
}
void GlobalLexicalModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
}
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const

View File

@ -71,29 +71,29 @@ public:
bool IsUseable(const FactorMask &mask) const;
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
};
}

View File

@ -27,8 +27,8 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
// read optional punctuation and bias specifications
if (spec.size() > 0) {
if (spec.size() != 2 && spec.size() != 3 && spec.size() != 4 && spec.size() != 6) {
std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
<< "[context-type] [filename-src filename-tgt]";
std::cerr << "Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
<< "[context-type] [filename-src filename-tgt]";
//return false;
}
@ -48,7 +48,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
factors = Tokenize(modelSpec[i],"-");
if ( factors.size() != 2 ) {
std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
std::cerr << "Wrong factor definition for global lexical model unlimited: " << modelSpec[i];
//return false;
}
@ -60,10 +60,10 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
if (restricted) {
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
if (!glmu->Load(filenameSource, filenameTarget)) {
std::cerr << "Unable to load word lists for word translation feature from files "
<< filenameSource
<< " and "
<< filenameTarget;
std::cerr << "Unable to load word lists for word translation feature from files "
<< filenameSource
<< " and "
<< filenameTarget;
//return false;
}
}

View File

@ -82,31 +82,31 @@ public:
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const;
ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
int /* featureID */,
ScoreComponentCollection* ) const {
int /* featureID */,
ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void AddFeature(ScoreComponentCollection* accumulator,
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,

View File

@ -19,33 +19,33 @@ public:
}
virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
/**
* Same for chart-based features.
**/
virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
};

View File

@ -45,11 +45,11 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
}
void InputFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
if (m_legacy) {
//binary phrase-table does input feature itself

View File

@ -42,28 +42,28 @@ public:
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
ScoreComponentCollection* accumulator) const {
}
};

View File

@ -4,236 +4,241 @@ namespace Moses
{
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
{
m_value_nt(0),
m_isTerminal(terminal)
{
if (len > 0) {
m_value.assign(line, start, len);
}
if (len > 0) {
m_value.assign(line, start, len);
}
}
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
{
m_value_nt(0),
m_isTerminal(terminal)
{
size_t found = line.find_first_of("[] ");
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
if (found == line.npos) {
m_value = line;
} else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
{
char token = 0;
size_t len = 0;
char token = 0;
size_t len = 0;
while (token != ']' && pos != std::string::npos)
{
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
len = pos-oldpos;
while (token != ']' && pos != std::string::npos) {
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
len = pos-oldpos;
if (token == '[') {
if (!m_value.empty()) {
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
pos = m_children.back()->AddSubTree(line, pos+1);
}
else {
if (len > 0) {
m_value.assign(line, oldpos, len);
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (len > 0 && m_value.empty()) {
m_value.assign(line, oldpos, len);
}
else if (len > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
}
if (token == ' ') {
pos++;
}
}
if (!m_children.empty()) {
m_isTerminal = false;
if (token == '[') {
if (!m_value.empty()) {
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
pos = m_children.back()->AddSubTree(line, pos+1);
} else {
if (len > 0) {
m_value.assign(line, oldpos, len);
}
pos = AddSubTree(line, pos+1);
}
} else if (token == ' ' || token == ']') {
if (len > 0 && m_value.empty()) {
m_value.assign(line, oldpos, len);
} else if (len > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
}
if (token == ' ') {
pos++;
}
}
if (pos == std::string::npos) {
return line.size();
if (!m_children.empty()) {
m_isTerminal = false;
}
return std::min(line.size(),pos+1);
}
if (pos == std::string::npos) {
return line.size();
}
return std::min(line.size(),pos+1);
}
std::string InternalTree::GetString(bool start) const {
std::string InternalTree::GetString(bool start) const
{
std::string ret = "";
if (!start) {
ret += " ";
}
std::string ret = "";
if (!start) {
ret += " ";
}
if (!m_isTerminal) {
ret += "[";
}
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
{
ret += (*it)->GetString(false);
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
ret += (*it)->GetString(false);
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
void InternalTree::Combine(const std::vector<TreePointer> &previous)
{
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
}
else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
} else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
}

View File

@ -19,79 +19,79 @@ typedef int NTLabel;
class InternalTree
{
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it));
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it));
}
}
size_t AddSubTree(const std::string & line, size_t start);
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
bool IsTerminal() const {
return m_isTerminal;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
bool IsTerminal() const {
return m_isTerminal;
}
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
@ -101,77 +101,79 @@ class TreeState : public FFState
TreePointer m_tree;
public:
TreeState(TreePointer tree)
:m_tree(tree)
{}
TreePointer GetTree() const {
return m_tree;
:m_tree(tree) {
}
int Compare(const FFState& other) const {return 0;};
TreePointer GetTree() const {
return m_tree;
}
int Compare(const FFState& other) const {
return 0;
};
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
$generator(leafNT)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
$stop;
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
$generator(leafNTParent)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
$stop;
}
$stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
$emit(std::vector<TreePointer>::iterator)
path->push_back(tree);
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
path->push_back((*it).get());
$yield(it);
path->pop_back();
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
$generator(leafNTPath)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
$emit(std::vector<TreePointer>::iterator)
path->push_back(tree);
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
path->push_back((*it).get());
$yield(it);
path->pop_back();
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
path->pop_back();
$stop;
}
path->pop_back();
$stop;
};

View File

@ -15,7 +15,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs;
m_haveDefaultScores = false;
m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
@ -36,7 +36,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
}
m_haveDefaultScores = true;
m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
@ -84,8 +84,8 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
}
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const
const FFState* prev_state,
ScoreComponentCollection* out) const
{
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0);

View File

@ -46,33 +46,37 @@ public:
Scores GetProb(const Phrase& f, const Phrase& e) const;
virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
int /* featureID */,
ScoreComponentCollection*) const {
int /* featureID */,
ScoreComponentCollection*) const {
UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
bool GetHaveDefaultScores() { return m_haveDefaultScores; }
float GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
bool GetHaveDefaultScores() {
return m_haveDefaultScores;
}
float GetDefaultScore( size_t i ) {
return m_defaultScores[i];
}
private:
bool DecodeCondition(std::string s);

View File

@ -39,7 +39,7 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
}
void LexicalReorderingConfiguration::ConfigureSparse
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{
if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs, producer));
@ -95,7 +95,7 @@ LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string
}
if (m_modelType == None) {
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
exit(1);
}
}
@ -134,7 +134,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
"Unknown direction: " << m_direction);
"Unknown direction: " << m_direction);
const TranslationOption* relevantOpt = &topt;
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
@ -146,8 +146,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType];
}
else {
} else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
}
@ -158,8 +157,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
else {
} else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);

View File

@ -124,7 +124,7 @@ protected:
int ComparePrevScores(const TranslationOption *other) const;
//constants for the different type of reorderings (corresponding to indexes in the table file)
public:
public:
static const ReorderingType M = 0; // monotonic
static const ReorderingType NM = 1; // non-monotonic
static const ReorderingType S = 1; // swap

View File

@ -16,10 +16,11 @@
using namespace std;
namespace Moses
namespace Moses
{
const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
{
static string kSep = "-";
static string name;
ostringstream buf;
@ -55,7 +56,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId) {
}
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
: m_producer(producer)
: m_producer(producer)
{
static const string kSource= "source";
static const string kTarget = "target";
@ -93,22 +94,24 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
}
void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
{
for (size_t type = SparseReorderingFeatureKey::Stack;
type <= SparseReorderingFeatureKey::Between; ++type) {
type <= SparseReorderingFeatureKey::Between; ++type) {
for (size_t position = SparseReorderingFeatureKey::First;
position <= SparseReorderingFeatureKey::Last; ++position) {
position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
SparseReorderingFeatureKey key(
index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
}
}
}
}
void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
{
ifstream fh(filename.c_str());
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
string line;
@ -118,12 +121,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
//TODO: StringPiece
const Factor* factor = FactorCollection::Instance().AddFactor(line);
pWordLists->back().second.insert(factor);
PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
}
}
void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
{
pClusterMaps->push_back(ClusterMap());
pClusterMaps->back().first = id;
util::FilePiece file(filename.c_str());
@ -141,15 +145,16 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
pClusterMaps->back().second[wordFactor] = idFactor;
PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
}
}
void SparseReordering::AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const {
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const
{
const Factor* wordFactor = word.GetFactor(0);
@ -186,18 +191,18 @@ void SparseReordering::AddFeatures(
}
void SparseReordering::CopyScores(
const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const
const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const
{
if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
(reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
reoType == LexicalReorderingState::DR)) {
reoType == LexicalReorderingState::DR)) {
size_t gapStart, gapEnd;
//NB: Using a static cast for speed, but could be nasty if
//NB: Using a static cast for speed, but could be nasty if
//using non-sentence input
const Sentence& sentence = static_cast<const Sentence&>(input);
const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
@ -217,9 +222,9 @@ void SparseReordering::CopyScores(
}
assert(gapStart < gapEnd);
for (size_t i = gapStart; i < gapEnd; ++i) {
AddFeatures(SparseReorderingFeatureKey::Between,
SparseReorderingFeatureKey::Source, sentence.GetWord(i),
SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(SparseReorderingFeatureKey::Between,
SparseReorderingFeatureKey::Source, sentence.GetWord(i),
SparseReorderingFeatureKey::First, reoType, scores);
}
}
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
@ -240,11 +245,11 @@ void SparseReordering::CopyScores(
}
const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
SparseReorderingFeatureKey::First, reoType, scores);
SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
SparseReorderingFeatureKey::First, reoType, scores);
SparseReorderingFeatureKey::First, reoType, scores);
AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);

View File

@ -23,7 +23,7 @@
/**
Configuration of sparse reordering:
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
@ -38,7 +38,7 @@
namespace Moses
{
/**
/**
* Used to store pre-calculated feature names.
**/
struct SparseReorderingFeatureKey {
@ -51,17 +51,17 @@ struct SparseReorderingFeatureKey {
LexicalReorderingState::ReorderingType reoType;
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_)
: id(id_), type(type_), word(word_), isCluster(isCluster_),
position(position_), side(side_), reoType(reoType_)
{}
position(position_), side(side_), reoType(reoType_) {
}
const std::string& Name(const std::string& wordListId) ;
const std::string& Name(const std::string& wordListId) ;
};
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
std::size_t operator()(const SparseReorderingFeatureKey& key) const {
//TODO: can we just hash the memory?
//TODO: can we just hash the memory?
//not sure, there could be random padding
std::size_t seed = 0;
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
@ -76,7 +76,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
};
struct EqualsSparseReorderingFeatureKey :
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
//TODO: Can we just compare the memory?
return left.id == right.id && left.type == right.type && left.word == right.word &&
@ -89,14 +89,14 @@ class SparseReordering
{
public:
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
//If direction is backward the options will be different, for forward they will be the same
void CopyScores(const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const ;
LexicalReorderingState::ReorderingType reoType,
LexicalReorderingConfiguration::Direction direction,
ScoreComponentCollection* scores) const ;
private:
const LexicalReordering* m_producer;
@ -113,14 +113,14 @@ private:
FeatureMap m_featureMap;
void ReadWordList(const std::string& filename, const std::string& id,
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType,
const Word& word, SparseReorderingFeatureKey::Position position,
LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const;
};

View File

@ -14,10 +14,10 @@ using namespace std;
namespace Moses
{
MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
:StatelessFeatureFunction(1, line)
,m_maxSpan(2)
,m_glueTargetLHSStr("S")
,m_glueTargetLHS(true)
:StatelessFeatureFunction(1, line)
,m_maxSpan(2)
,m_glueTargetLHSStr("S")
,m_glueTargetLHS(true)
{
m_tuneable = false;
ReadParameters();
@ -28,25 +28,25 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
}
void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
const Word &targetLHS = targetPhrase.GetTargetLHS();
if (targetLHS == m_glueTargetLHS) {
// don't delete glue rules
return;
// don't delete glue rules
return;
}
const Phrase *source = targetPhrase.GetRuleSource();
@ -54,17 +54,17 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
float score = 0;
if (source->Front().IsNonTerminal()) {
const ChartCellLabel &cell = *stackVec->front();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity();
}
const ChartCellLabel &cell = *stackVec->front();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity();
}
}
if (source->Back().IsNonTerminal()) {
const ChartCellLabel &cell = *stackVec->back();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity();
}
const ChartCellLabel &cell = *stackVec->back();
if (cell.GetCoverage().GetNumWordsCovered() > m_maxSpan) {
score = - std::numeric_limits<float>::infinity();
}
}
@ -76,7 +76,7 @@ void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::string& value)
{
if (key == "max-span") {
m_maxSpan = Scan<int>(value);
m_maxSpan = Scan<int>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
@ -84,8 +84,8 @@ void MaxSpanFreeNonTermSource::SetParameter(const std::string& key, const std::s
std::vector<float> MaxSpanFreeNonTermSource::DefaultWeights() const
{
std::vector<float> ret(1, 1);
return ret;
std::vector<float> ret(1, 1);
return ret;
}
}

Some files were not shown because too many files have changed in this diff Show More