mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
daily automatic beautifier
This commit is contained in:
parent
93dd29639a
commit
f895e09292
@ -36,7 +36,8 @@ VW::VW(const std::string &line)
|
||||
: StatefulFeatureFunction(1, line)
|
||||
, TLSTargetSentence(this)
|
||||
, m_train(false)
|
||||
, m_sentenceStartWord(Word()) {
|
||||
, m_sentenceStartWord(Word())
|
||||
{
|
||||
ReadParameters();
|
||||
Discriminative::ClassifierFactory *classifierFactory = m_train
|
||||
? new Discriminative::ClassifierFactory(m_modelPath)
|
||||
@ -65,7 +66,8 @@ VW::VW(const std::string &line)
|
||||
m_sentenceStartWord.SetFactor(i, bosFactor);
|
||||
}
|
||||
|
||||
VW::~VW() {
|
||||
VW::~VW()
|
||||
{
|
||||
delete m_tlsClassifier;
|
||||
delete m_normalizer;
|
||||
// TODO delete more stuff
|
||||
@ -75,7 +77,7 @@ FFState* VW::EvaluateWhenApplied(
|
||||
const Hypothesis& curHypo,
|
||||
const FFState* prevState,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
{
|
||||
VERBOSE(3, "VW :: Evaluating translation options\n");
|
||||
|
||||
const VWState& prevVWState = *static_cast<const VWState *>(prevState);
|
||||
@ -96,12 +98,12 @@ FFState* VW::EvaluateWhenApplied(
|
||||
// compute our current key
|
||||
size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);
|
||||
|
||||
boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
|
||||
= *m_tlsComputedStateExtensions->GetStored();
|
||||
boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
|
||||
= *m_tlsComputedStateExtensions->GetStored();
|
||||
|
||||
if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
|
||||
// we have not computed this set of translation options yet
|
||||
const TranslationOptionList *topts =
|
||||
const TranslationOptionList *topts =
|
||||
curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);
|
||||
|
||||
const InputType& input = curHypo.GetManager().GetSource();
|
||||
@ -178,17 +180,19 @@ FFState* VW::EvaluateWhenApplied(
|
||||
return new VWState(prevVWState, curHypo);
|
||||
}
|
||||
|
||||
const FFState* VW::EmptyHypothesisState(const InputType &input) const {
|
||||
const FFState* VW::EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
|
||||
Phrase initialPhrase;
|
||||
for (size_t i = 0; i < maxContextSize; i++)
|
||||
initialPhrase.AddWord(m_sentenceStartWord);
|
||||
|
||||
|
||||
return new VWState(initialPhrase);
|
||||
}
|
||||
|
||||
void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{
|
||||
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
|
||||
|
||||
if (translationOptionList.size() == 0)
|
||||
@ -329,7 +333,7 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
// for future use at decoding time
|
||||
size_t toptHash = hash_value(*topt);
|
||||
m_tlsTranslationOptionFeatures->GetStored()->insert(
|
||||
std::make_pair(toptHash, outFeaturesTargetNamespace));
|
||||
std::make_pair(toptHash, outFeaturesTargetNamespace));
|
||||
|
||||
// get classifier score
|
||||
losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
|
||||
@ -355,7 +359,7 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
// We have target context features => this is just a partial score,
|
||||
// do not add it to the score component collection.
|
||||
size_t toptHash = hash_value(*topt);
|
||||
|
||||
|
||||
// Subtract the score contribution of target-only features, otherwise it would
|
||||
// be included twice.
|
||||
Discriminative::FeatureVector emptySource;
|
||||
@ -372,7 +376,8 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
}
|
||||
}
|
||||
|
||||
void VW::SetParameter(const std::string& key, const std::string& value) {
|
||||
void VW::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "train") {
|
||||
m_train = Scan<bool>(value);
|
||||
} else if (key == "path") {
|
||||
@ -405,7 +410,8 @@ void VW::SetParameter(const std::string& key, const std::string& value) {
|
||||
}
|
||||
}
|
||||
|
||||
void VW::InitializeForInput(ttasksptr const& ttask) {
|
||||
void VW::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
// do not keep future cost estimates across sentences!
|
||||
m_tlsFutureScores->GetStored()->clear();
|
||||
|
||||
@ -457,7 +463,8 @@ void VW::InitializeForInput(ttasksptr const& ttask) {
|
||||
* private methods
|
||||
************************************************************************************/
|
||||
|
||||
const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const {
|
||||
const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const
|
||||
{
|
||||
std::set<std::pair<size_t, size_t> > alignmentPoints;
|
||||
const Hypothesis *contextHypo = curHypo.GetPrevHypo();
|
||||
int idxInContext = contextSize - 1;
|
||||
@ -482,7 +489,8 @@ const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_
|
||||
return AlignmentInfoCollection::Instance().Add(alignmentPoints);
|
||||
}
|
||||
|
||||
AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const {
|
||||
AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const
|
||||
{
|
||||
std::set<std::pair<size_t, size_t> > alignmentPoints;
|
||||
for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
|
||||
std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
|
||||
@ -493,7 +501,8 @@ AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t
|
||||
return AlignmentInfo(alignmentPoints);
|
||||
}
|
||||
|
||||
std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const {
|
||||
std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const
|
||||
{
|
||||
|
||||
//std::cerr << topt.GetSourceWordsRange() << std::endl;
|
||||
|
||||
@ -565,7 +574,8 @@ std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &top
|
||||
return std::make_pair(false, -1);
|
||||
}
|
||||
|
||||
std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
|
||||
std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const
|
||||
{
|
||||
UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
|
||||
|
||||
float sourceRawCount = 0.0;
|
||||
|
@ -104,13 +104,13 @@ public:
|
||||
// are written to a file, no classifier predictions take place. Target-side
|
||||
// context is constant at training time (we know the true target sentence),
|
||||
// so target-context features are extracted here as well.
|
||||
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const;
|
||||
|
||||
// Evaluate VW during decoding. This is only used at prediction time (not in training).
|
||||
// When no target-context features are defined, VW predictions were already fully calculated
|
||||
// in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
|
||||
// If there are target-context features, we compute the context-dependent part of the
|
||||
// If there are target-context features, we compute the context-dependent part of the
|
||||
// classifier score and combine it with the source-context only partial score which was computed
|
||||
// in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
|
||||
// method more efficient.
|
||||
@ -122,16 +122,16 @@ public:
|
||||
virtual FFState* EvaluateWhenApplied(
|
||||
const ChartHypothesis&,
|
||||
int,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
throw new std::logic_error("hiearchical/syntax not supported");
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
throw new std::logic_error("hiearchical/syntax not supported");
|
||||
}
|
||||
|
||||
// Initial VW state; contains unaligned BOS symbols.
|
||||
const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
// At prediction time, this clears our caches. At training time, we load the next sentence, its
|
||||
// At prediction time, this clears our caches. At training time, we load the next sentence, its
|
||||
// translation and word alignment.
|
||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||
|
||||
@ -181,7 +181,7 @@ private:
|
||||
|
||||
// normalizer, typically this means softmax
|
||||
Discriminative::Normalizer *m_normalizer = NULL;
|
||||
|
||||
|
||||
// thread-specific classifier instance
|
||||
TLSClassifier *m_tlsClassifier;
|
||||
|
||||
|
@ -14,7 +14,8 @@ std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatu
|
||||
std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;
|
||||
|
||||
|
||||
void VWFeatureBase::UpdateContextSize(const std::string &usedBy) {
|
||||
void VWFeatureBase::UpdateContextSize(const std::string &usedBy)
|
||||
{
|
||||
// using the standard map behavior here: if the entry does not
|
||||
// exist, it will be added and initialized to zero
|
||||
size_t currentSize = s_targetContextLength[usedBy];
|
||||
|
@ -12,7 +12,7 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
enum VWFeatureType {
|
||||
enum VWFeatureType {
|
||||
vwft_source,
|
||||
vwft_target,
|
||||
vwft_targetContext
|
||||
|
@ -43,7 +43,7 @@ public:
|
||||
if (key == "size") {
|
||||
m_contextSize = Scan<size_t>(value);
|
||||
} else if (key == "factor-positions") {
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// option to select only certain positions; this assumes that only a single
|
||||
// target-side factor is defined
|
||||
Tokenize<size_t>(m_factorPositions, value, ",");
|
||||
@ -92,9 +92,9 @@ protected:
|
||||
|
||||
// get source words aligned to a particular context word
|
||||
std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
|
||||
, const InputType &input
|
||||
, const AlignmentInfo &alignInfo
|
||||
, size_t posFromEnd) const {
|
||||
, const InputType &input
|
||||
, const AlignmentInfo &alignInfo
|
||||
, size_t posFromEnd) const {
|
||||
size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
|
||||
std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
|
||||
std::vector<std::string> out;
|
||||
@ -108,7 +108,7 @@ protected:
|
||||
// required context size
|
||||
size_t m_contextSize;
|
||||
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// option to select only certain positions
|
||||
std::vector<size_t> m_factorPositions;
|
||||
};
|
||||
|
@ -25,8 +25,8 @@ public:
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 1; i < m_contextSize; i++)
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
|
||||
+ "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
|
||||
+ "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
|
@ -9,24 +9,28 @@
|
||||
#include "moses/TranslationOption.h"
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
VWState::VWState() : m_spanStart(0), m_spanEnd(0) {
|
||||
VWState::VWState() : m_spanStart(0), m_spanEnd(0)
|
||||
{
|
||||
ComputeHash();
|
||||
}
|
||||
|
||||
VWState::VWState(const Phrase &phrase)
|
||||
: m_phrase(phrase), m_spanStart(0), m_spanEnd(0) {
|
||||
VWState::VWState(const Phrase &phrase)
|
||||
: m_phrase(phrase), m_spanStart(0), m_spanEnd(0)
|
||||
{
|
||||
ComputeHash();
|
||||
}
|
||||
|
||||
VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
|
||||
VWState::VWState(const VWState &prevState, const Hypothesis &curHypo)
|
||||
{
|
||||
VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");
|
||||
|
||||
// copy phrase from previous state
|
||||
Phrase phrase = prevState.GetPhrase();
|
||||
size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
|
||||
|
||||
|
||||
// add words from current hypothesis
|
||||
phrase.Append(curHypo.GetCurrTargetPhrase());
|
||||
|
||||
@ -46,15 +50,17 @@ VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
|
||||
VERBOSE(3, ">> updated state: " << *this << "\n");
|
||||
}
|
||||
|
||||
bool VWState::operator==(const FFState& o) const {
|
||||
bool VWState::operator==(const FFState& o) const
|
||||
{
|
||||
const VWState &other = static_cast<const VWState &>(o);
|
||||
|
||||
return m_phrase == other.GetPhrase()
|
||||
&& m_spanStart == other.GetSpanStart()
|
||||
&& m_spanEnd == other.GetSpanEnd();
|
||||
&& m_spanStart == other.GetSpanStart()
|
||||
&& m_spanEnd == other.GetSpanEnd();
|
||||
}
|
||||
|
||||
void VWState::ComputeHash() {
|
||||
void VWState::ComputeHash()
|
||||
{
|
||||
m_hash = 0;
|
||||
|
||||
boost::hash_combine(m_hash, m_phrase);
|
||||
@ -62,7 +68,8 @@ void VWState::ComputeHash() {
|
||||
boost::hash_combine(m_hash, m_spanEnd);
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const VWState &state) {
|
||||
std::ostream &operator<<(std::ostream &out, const VWState &state)
|
||||
{
|
||||
out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
|
||||
return out;
|
||||
}
|
||||
|
@ -6,15 +6,17 @@
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/**
|
||||
* VW state, used in decoding (when target context is enabled).
|
||||
*/
|
||||
class VWState : public FFState {
|
||||
class VWState : public FFState
|
||||
{
|
||||
public:
|
||||
// empty state, used only when VWState is ignored
|
||||
VWState();
|
||||
VWState();
|
||||
|
||||
// used for construction of the initial VW state
|
||||
VWState(const Phrase &phrase);
|
||||
|
@ -13,7 +13,8 @@ namespace Moses
|
||||
/**
|
||||
* VW thread-specific data about target sentence.
|
||||
*/
|
||||
class VWTargetSentence {
|
||||
class VWTargetSentence
|
||||
{
|
||||
public:
|
||||
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
|
||||
|
||||
|
@ -246,12 +246,12 @@ std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj)
|
||||
|
||||
out << "Walls:";
|
||||
for (size_t i = 0; i < obj.m_size; ++i) {
|
||||
out << obj.m_wall[i];
|
||||
out << obj.m_wall[i];
|
||||
}
|
||||
|
||||
out << " Local walls:";
|
||||
for (size_t i = 0; i < obj.m_size; ++i) {
|
||||
out << obj.m_localWall[i] << " ";
|
||||
out << obj.m_localWall[i] << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
|
Loading…
Reference in New Issue
Block a user