This commit is contained in:
Nicola Bertoldi 2014-05-19 15:35:08 +02:00
parent 2f3cd5e2fe
commit 20b3e8929e
17 changed files with 240 additions and 234 deletions

View File

@ -221,7 +221,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
phrase.AddWord(word);
if (retSourceTarget == 1) {
out = word;
out = word;
}
}
@ -232,7 +232,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
phrase.AddWord(word);
if (retSourceTarget == 2) {
out = word;
out = word;
}
}

View File

@ -34,7 +34,8 @@ namespace MosesTuning
#define CHILD_STDOUT_WRITE pipefds_output[1]
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config) {
: StatisticsBasedScorer("METEOR",config)
{
meteor_jar = getConfig("jar", "");
meteor_lang = getConfig("lang", "en");
meteor_task = getConfig("task", "tune");
@ -88,7 +89,8 @@ MeteorScorer::MeteorScorer(const string& config)
m_from_meteor = new ifdstream(CHILD_STDOUT_READ);
}
MeteorScorer::~MeteorScorer() {
MeteorScorer::~MeteorScorer()
{
// Cleanup IO
delete m_to_meteor;
delete m_from_meteor;
@ -171,7 +173,8 @@ float MeteorScorer::calculateScore(const vector<int>& comps) const
// Meteor unsupported, throw error if used
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config) {
: StatisticsBasedScorer("METEOR",config)
{
throw runtime_error("Meteor unsupported, requires GLIBCXX");
}

View File

@ -20,7 +20,7 @@ class ifdstream;
class ScoreStats;
/**
* Meteor scoring
* Meteor scoring
*
* https://github.com/mjdenkowski/meteor
* http://statmt.org/wmt11/pdf/WMT07.pdf

View File

@ -35,7 +35,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
m_fromFilter(NULL)
{
#if defined __MINGW32__
//TODO(jie): replace this function with boost implementation
//TODO(jie): replace this function with boost implementation
#else
// Child error signal install
// sigaction is the replacement for the traditional signal() method

View File

@ -132,7 +132,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
if (!staticData.GetOutputUnknownsFile().empty()) {
@ -140,7 +140,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream);
UTIL_THROW_IF2(!m_unknownsStream->good(),
"File for unknowns words could not be opened: " <<
staticData.GetOutputUnknownsFile());
staticData.GetOutputUnknownsFile());
}
}
@ -188,7 +188,7 @@ InputType*IOWrapper::GetInput(InputType* inputType)
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
@ -197,12 +197,12 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
"Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
"Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
@ -246,7 +246,7 @@ void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, l
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i];
UTIL_THROW_IF(factor == NULL, util::Exception,
"No factor at position " << i );
"No factor at position " << i );
cout << *factor << " ";
}
@ -403,7 +403,7 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
@ -459,7 +459,7 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
// recursive
const search::Applied *child = applied->Children();
for (size_t i = 0; i < applied->GetArity(); i++) {
OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
}
}
@ -476,7 +476,7 @@ void IOWrapper::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailOutputCollector == NULL,
"No ouput file for detailed reports specified");
"No ouput file for detailed reports specified");
m_detailOutputCollector->Write(translationId, out.str());
}
@ -493,7 +493,7 @@ void IOWrapper::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
UTIL_THROW_IF2(m_detailOutputCollector == NULL,
"No ouput file for detailed reports specified");
"No ouput file for detailed reports specified");
m_detailOutputCollector->Write(translationId, out.str());
}
@ -510,18 +510,18 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
"No output file for tree fragments specified");
"No output file for tree fragments specified");
//Tree of full sentence
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
if (treeStructure != NULL) {
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
if (sff[i] == treeStructure) {
if (sff[i] == treeStructure) {
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
break;
}
}
}
}
@ -542,7 +542,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
"No output file for tree fragments specified");
"No output file for tree fragments specified");
//Tree of full sentence
//TODO: incremental search doesn't support stateful features
@ -581,7 +581,7 @@ void IOWrapper::OutputDetailedAllTranslationReport(
}
}
UTIL_THROW_IF2(m_detailAllOutputCollector == NULL,
"No output file for details specified");
"No output file for details specified");
m_detailAllOutputCollector->Write(translationId, out.str());
}
@ -609,7 +609,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
@ -641,7 +641,7 @@ void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
@ -730,7 +730,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
// delete 1st & last
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -805,7 +805,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
StaticData::Instance().PrintAlignmentInfoInNbest();
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
@ -816,7 +816,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -858,7 +858,7 @@ void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@ -980,9 +980,9 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT
}
size_t IOWrapper::OutputAlignmentNBest(
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget)
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget)
{
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
@ -1023,7 +1023,7 @@ size_t IOWrapper::OutputAlignmentNBest(
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
@ -1114,7 +1114,7 @@ size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypoth
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];

View File

@ -234,8 +234,7 @@ static void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -243,8 +242,7 @@ static void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}

View File

@ -253,17 +253,17 @@ public:
if ( appendSuffix ) {
fileName << "." << compression;
}
boost::iostreams::filtering_ostream *file
= new boost::iostreams::filtering_ostream;
boost::iostreams::filtering_ostream *file
= new boost::iostreams::filtering_ostream;
if ( compression == "gz" ) {
file->push( boost::iostreams::gzip_compressor() );
} else if ( compression == "bz2" ) {
file->push( boost::iostreams::bzip2_compressor() );
} else if ( compression != "txt" ) {
TRACE_ERR("Unrecognized hypergraph compression format ("
<< compression
<< ") - using uncompressed plain txt" << std::endl);
TRACE_ERR("Unrecognized hypergraph compression format ("
<< compression
<< ") - using uncompressed plain txt" << std::endl);
compression = "txt";
}
@ -274,10 +274,10 @@ public:
manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file);
file -> flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
<< " because the output file " << fileName.str()
<< " is not open or not ready for writing"
<< std::endl);
TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
<< " because the output file " << fileName.str()
<< " is not open or not ready for writing"
<< std::endl);
}
file -> pop();
delete file;
@ -504,8 +504,7 @@ static void ShowWeights()
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -513,8 +512,7 @@ static void ShowWeights()
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
} else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
@ -585,7 +583,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
@ -604,15 +602,15 @@ int main(int argc, char** argv)
exit(1);
}
std::cerr <<"Before StaticData::LoadDataStatic" << std::endl;
std::cerr <<"Before StaticData::LoadDataStatic" << std::endl;
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(&params, argv[0])) {
exit(1);
}
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
std::cerr <<"Before ShowWeights" << std::endl;
std::cerr <<"Before ShowWeights" << std::endl;
// setting "-show-weights" -> just dump out weights and exit
if (params.isParamSpecified("show-weights")) {
ShowWeights();

View File

@ -55,9 +55,9 @@ DomainFeature::DomainFeature(const string& domainFile) : m_propertyKey("domain")
m_domain.load(domainFile);
}
void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
{
std::string value = m_domain.getDomainOfSentence(sentenceId);
phrasePair.AddProperty(m_propertyKey, value, count);
@ -69,13 +69,13 @@ void DomainFeature::add(const ScoreFeatureContext& context,
{
const map<string,float> *domainCount = context.phrasePair.GetProperty(m_propertyKey);
assert( domainCount != NULL );
add(*domainCount,
context.phrasePair.GetCount(),
context.maybeLog,
add(*domainCount,
context.phrasePair.GetCount(),
context.maybeLog,
denseValues, sparseValues);
}
void SubsetDomainFeature::add(const map<string,float>& domainCount,
void SubsetDomainFeature::add(const map<string,float>& domainCount,
float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,

View File

@ -35,8 +35,8 @@ public:
DomainFeature(const std::string& domainFile);
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const;
void add(const ScoreFeatureContext& context,

View File

@ -29,7 +29,8 @@
using namespace std;
namespace MosesTraining {
namespace MosesTraining
{
extern Vocabulary vcbT;
@ -38,23 +39,23 @@ extern Vocabulary vcbS;
extern bool hierarchicalFlag;
ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum ) :
m_phraseSource(phraseSource),
m_phraseTarget(phraseTarget),
m_count(count),
m_pcfgSum(pcfgSum)
ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum ) :
m_phraseSource(phraseSource),
m_phraseTarget(phraseTarget),
m_count(count),
m_pcfgSum(pcfgSum)
{
assert(phraseSource->empty());
assert(phraseTarget->empty());
m_count = count;
m_pcfgSum = pcfgSum;
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_lastTargetToSourceAlignment = insertedAlignment.first;
m_lastCount = m_count;
@ -64,29 +65,30 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
}
ExtractionPhrasePair::~ExtractionPhrasePair( ) {
ExtractionPhrasePair::~ExtractionPhrasePair( )
{
Clear();
}
// return value: true if the given alignment was seen for the first time and thus will be stored,
// false if it was present already (the pointer may thus be deleted(
bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum )
bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum )
{
m_count += count;
m_pcfgSum += pcfgSum;
m_lastCount = count;
m_lastPcfgSum = pcfgSum;
std::map<ALIGNMENT*,float>::iterator iter = m_lastTargetToSourceAlignment;
if ( *(iter->first) == *targetToSourceAlignment ) {
iter->second += count;
return false;
} else {
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
if ( !insertedAlignment.second ) {
// the alignment already exists: increment count
insertedAlignment.first->second += count;
@ -105,7 +107,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
m_pcfgSum += pcfgSum;
m_lastTargetToSourceAlignment->second += count;
// properties
for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
iter !=m_properties.end(); ++iter ) {
LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second;
(*lastPropertyValue)->second += count;
@ -116,7 +118,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
}
// Check for lexical match
// Check for lexical match
// and in case of SCFG rules for equal non-terminal alignment.
bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
const PHRASE *otherPhraseTarget,
@ -132,9 +134,9 @@ bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
return MatchesAlignment( otherTargetToSourceAlignment );
}
// Check for lexical match
// Check for lexical match
// and in case of SCFG rules for equal non-terminal alignment.
// Set boolean indicators.
// Set boolean indicators.
// (Note that we check in the order: target - source - alignment
// and do not touch the subsequent boolean indicators once a previous one has been set to false.)
bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
@ -194,7 +196,7 @@ bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlign
return true;
}
void ExtractionPhrasePair::Clear()
void ExtractionPhrasePair::Clear()
{
delete m_phraseSource;
delete m_phraseTarget;
@ -218,7 +220,7 @@ void ExtractionPhrasePair::Clear()
m_lastCount = 0.0f;
m_lastPcfgSum = 0.0f;
m_lastTargetToSourceAlignment = m_targetToSourceAlignments.begin();
m_isValid = false;
}
@ -252,7 +254,7 @@ const ALIGNMENT *ExtractionPhrasePair::FindBestAlignmentTargetToSource() const
std::map<ALIGNMENT*,float>::const_iterator bestAlignment = m_targetToSourceAlignments.end();
for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
iter!=m_targetToSourceAlignments.end(); ++iter) {
if ( (iter->second > bestAlignmentCount) ||
( (iter->second == bestAlignmentCount) &&
@ -281,7 +283,7 @@ const std::string *ExtractionPhrasePair::FindBestPropertyValue(const std::string
PROPERTY_VALUES::const_iterator bestPropertyValue = allPropertyValues->end();
for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
if ( (iter->second > bestPropertyCount) ||
( (iter->second == bestPropertyCount) &&
@ -308,7 +310,7 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
}
std::ostringstream oss;
for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
if (iter!=allPropertyValues->begin()) {
oss << " ";

View File

@ -24,20 +24,22 @@
#include <set>
#include <map>
namespace MosesTraining {
namespace MosesTraining
{
typedef std::vector< std::set<size_t> > ALIGNMENT;
class ExtractionPhrasePair {
class ExtractionPhrasePair
{
protected:
typedef std::map<std::string,float> PROPERTY_VALUES;
typedef std::map<std::string,float>::iterator LAST_PROPERTY_VALUE;
bool m_isValid;
const PHRASE *m_phraseSource;
@ -47,8 +49,8 @@ protected:
float m_pcfgSum;
std::map<ALIGNMENT*,float> m_targetToSourceAlignments;
std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
float m_lastCount;
float m_lastPcfgSum;
@ -56,14 +58,14 @@ protected:
public:
ExtractionPhrasePair( const PHRASE *phraseSource,
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
ExtractionPhrasePair( const PHRASE *phraseSource,
const PHRASE *phraseTarget,
ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum );
~ExtractionPhrasePair();
bool Add( ALIGNMENT *targetToSourceAlignment,
bool Add( ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum );
void IncrementPrevious( float count, float pcfgSum );
@ -91,7 +93,7 @@ public:
const PHRASE *GetSource() const {
return m_phraseSource;
}
const PHRASE *GetTarget() const {
return m_phraseTarget;
}
@ -126,10 +128,9 @@ public:
void AddProperties( const std::string &str, float count );
void AddProperty( const std::string &key, const std::string &value, float count )
{
void AddProperty( const std::string &key, const std::string &value, float count ) {
std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
if ( iter == m_properties.end() ) {
// key not found: insert property key and value
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();

View File

@ -8,7 +8,8 @@ namespace MosesTraining
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
std::map<std::string,float>& sparseValues) const
{
const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
iter!=allTrees->end(); ++iter ) {
@ -19,24 +20,26 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
void InternalStructFeatureDense::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = treeFragment->find("NP", start)) != string::npos) {
countNP += count;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
std::map<std::string,float>& sparseValues) const
{
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = treeFragment->find("NP", start)) != string::npos) {
countNP += count;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const {
std::map<std::string,float>& sparseValues) const
{
//cout<<"Sparse: "<<*internalStruct<<endl;
if(treeFragment->find("VBZ")!=std::string::npos)
sparseValues["NTVBZ"] += count;

View File

@ -21,20 +21,20 @@ namespace MosesTraining
class InternalStructFeature : public ScoreFeature
{
public:
InternalStructFeature() : m_type(0) {};
/** Add the values for this feature function. */
void add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
InternalStructFeature() : m_type(0) {};
/** Add the values for this feature function. */
void add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
protected:
/** Overridden in subclass */
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const = 0;
int m_type;
/** Overridden in subclass */
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const = 0;
int m_type;
};
class InternalStructFeatureDense : public InternalStructFeature
@ -45,10 +45,10 @@ public:
m_type=1;
} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class InternalStructFeatureSparse : public InternalStructFeature
@ -59,10 +59,10 @@ public:
m_type=2;
}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
virtual void add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
}

View File

@ -77,12 +77,12 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
} else if(args[i] == "--TreeFeatureSparse"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
} else if(args[i] == "--TreeFeatureDense"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else if(args[i] == "--TreeFeatureSparse") {
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
} else if(args[i] == "--TreeFeatureDense") {
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
}
@ -91,9 +91,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const
{
for (size_t i = 0; i < m_features.size(); ++i) {
m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId);

View File

@ -84,10 +84,10 @@ class ScoreFeature
public:
/** Some features might need to store properties in ExtractionPhrasePair,
* e.g. to pass along external information loaded by a feature
* e.g. to pass along external information loaded by a feature
* which may distinguish several phrase occurrences based on sentence ID */
virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const {};
/** Add the values for this feature function. */
@ -113,10 +113,10 @@ public:
void configure(const std::vector<std::string> args);
/** Some features might need to store properties in ExtractionPhrasePair,
* e.g. to pass along external information loaded by a feature
* e.g. to pass along external information loaded by a feature
* which may distinguish several phrase occurrences based on sentence ID */
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const;
/** Add all the features */

View File

@ -92,9 +92,9 @@ class ExtractTask
public:
ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation)
:m_sentence(sentence),
m_options(initoptions),
m_extractFileOrientation(extractFileOrientation)
{}
m_options(initoptions),
m_extractFileOrientation(extractFileOrientation)
{}
void Run();
private:
void extract(SentenceAlignment &);
@ -151,11 +151,11 @@ int main(int argc, char* argv[])
}
options.initInstanceWeightsFile(argv[++i]);
} else if (strcmp(argv[i], "--Debug") == 0) {
options.debug = true;
options.debug = true;
} else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
options.minPhraseLength = atoi(argv[++i]);
options.minPhraseLength = atoi(argv[++i]);
} else if (strcmp(argv[i], "--Separator") == 0) {
options.separator = argv[++i];
options.separator = argv[++i];
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@ -605,16 +605,14 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
int getClass(const std::string &str)
{
size_t pos = str.find("swap");
if (pos == str.npos) {
return 0;
}
else if (pos == 0) {
return 1;
}
else {
return 2;
}
size_t pos = str.find("swap");
if (pos == str.npos) {
return 0;
} else if (pos == 0) {
return 1;
} else {
return 2;
}
}
void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
@ -635,19 +633,19 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// start
m_extractFileOrientation << "<s> ";
for(int fi=0; fi<startF; fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// middle
for(int fi=startF; fi<=endF; fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// end
for(int fi=endF+1; fi<sentence.source.size(); fi++) {
m_extractFileOrientation << sentence.source[fi] << " ";
m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << "</s> ";
@ -655,7 +653,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// target
/*
for(int ei=startE; ei<=endE; ei++) {
m_extractFileOrientation << sentence.target[ei] << " ";
m_extractFileOrientation << sentence.target[ei] << " ";
}
*/
m_extractFileOrientation << endl;

View File

@ -68,7 +68,7 @@ std::map<std::string,float> sourceLHSCounts;
std::map<std::string, std::map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
std::set<std::string> sourceLabelSet;
std::map<std::string,size_t> sourceLabels;
std::map<std::string,size_t> sourceLabels;
std::vector<std::string> sourceLabelsByIndex;
Vocabulary vcbT;
@ -79,12 +79,12 @@ Vocabulary vcbS;
std::vector<std::string> tokenize( const char [] );
void processLine( std::string line,
int lineID, bool includeSentenceIdFlag, int &sentenceId,
int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum );
void writeCountOfCounts( const std::string &fileNameCountOfCounts );
void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb );
void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog );
double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@ -100,7 +100,7 @@ void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, co
int main(int argc, char* argv[])
{
std::cerr << "Score v2.1 -- "
std::cerr << "Score v2.1 -- "
<< "scoring methods for extracted rules" << std::endl;
ScoreFeatureManager featureManager;
@ -155,7 +155,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
unalignedFWFlag = true;
if (i+1==argc) {
std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
exit(1);
}
fileNameFunctionWords = argv[++i];
@ -224,8 +224,8 @@ int main(int argc, char* argv[])
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
std::cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << std::endl;
std::cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << std::endl;
exit(1);
}
phraseTableFile = outputFile;
@ -251,12 +251,12 @@ int main(int argc, char* argv[])
tmpPhraseSource = new PHRASE();
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
processLine( std::string(line),
processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
tmpCount, tmpPcfgSum);
phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
@ -288,14 +288,16 @@ int main(int argc, char* argv[])
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
tmpAdditionalPropertiesString.clear();
processLine( std::string(line),
processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
tmpCount, tmpPcfgSum);
tmpCount, tmpPcfgSum);
bool matchesPrevious = false;
bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
bool sourceMatch = true;
bool targetMatch = true;
bool alignmentMatch = true; // be careful with these,
// ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
// once the first of them has been found to have to be set to false
@ -330,7 +332,7 @@ int main(int argc, char* argv[])
if ( !phrasePairsWithSameSource.empty() &&
!sourceMatch ) {
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@ -347,8 +349,8 @@ int main(int argc, char* argv[])
}
}
phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
tmpTargetToSourceAlignment,
phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
@ -364,7 +366,7 @@ int main(int argc, char* argv[])
}
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@ -384,7 +386,7 @@ int main(int argc, char* argv[])
void processLine( std::string line,
int lineID, bool includeSentenceIdFlag, int &sentenceId,
int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum )
@ -474,7 +476,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
}
void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
{
if (phrasePairsWithSameSource.size() == 0) {
@ -486,23 +488,23 @@ void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSa
//std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl;
// loop through phrase pairs
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
totalSource += (*iter)->GetCount();
}
// output the distinct phrase pairs, one at a time
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb );
}
}
void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
float totalCount, int distinctCount,
ostream &phraseTableFile,
void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
float totalCount, int distinctCount,
ostream &phraseTableFile,
const ScoreFeatureManager& featureManager,
const MaybeLog& maybeLogProb )
{
@ -557,45 +559,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
// alignment
if ( hierarchicalFlag ) {
// always output alignment if hiero style
assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
std::vector<std::string> alignment;
for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
if ( bestAlignmentT2S->at(j).size() != 1 ) {
std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
phraseTableFile.flush();
assert(bestAlignmentT2S->at(j).size() == 1);
}
size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
//phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
} else {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
}
// always output alignment if hiero style
assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
std::vector<std::string> alignment;
for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
if ( bestAlignmentT2S->at(j).size() != 1 ) {
std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
phraseTableFile.flush();
assert(bestAlignmentT2S->at(j).size() == 1);
}
}
// now print all alignments, sorted by source index
sort(alignment.begin(), alignment.end());
for (size_t i = 0; i < alignment.size(); ++i) {
phraseTableFile << alignment[i] << " ";
}
} else if ( !inverseFlag && wordAlignmentFlag) {
// alignment info in pb model
for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
//phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
} else {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
phraseTableFile << sourcePos << "-" << j << " ";
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
}
}
}
// now print all alignments, sorted by source index
sort(alignment.begin(), alignment.end());
for (size_t i = 0; i < alignment.size(); ++i) {
phraseTableFile << alignment[i] << " ";
}
} else if ( !inverseFlag && wordAlignmentFlag) {
// alignment info in pb model
for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
size_t sourcePos = *setIter;
phraseTableFile << sourcePos << "-" << j << " ";
}
}
}
phraseTableFile << " ||| ";
@ -646,7 +648,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
if (kneserNeyFlag)
phraseTableFile << " " << distinctCount;
if ((treeFragmentsFlag) &&
if ((treeFragmentsFlag) &&
!inverseFlag) {
phraseTableFile << " |||";
}
@ -671,7 +673,7 @@ bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *al
// skip
} else {
const std::set<size_t> &sourceSet = alignmentTargetToSource->at(currTarget);
for (std::set<size_t>::const_iterator iter = sourceSet.begin();
for (std::set<size_t>::const_iterator iter = sourceSet.begin();
iter != sourceSet.end(); ++iter) {
size_t currSource = *iter;
@ -808,9 +810,9 @@ void LexicalTable::load( const string &fileName )
std::vector<string> token = tokenize( line );
if (token.size() != 3) {
std::cerr << "line " << i << " in " << fileName
<< " has wrong number of tokens, skipping:" << std::endl
<< token.size() << " " << token[0] << " " << line << std::endl;
std::cerr << "line " << i << " in " << fileName
<< " has wrong number of tokens, skipping:" << std::endl
<< token.size() << " " << token[0] << " " << line << std::endl;
continue;
}
@ -889,15 +891,16 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
// typedef std::vector< std::set<size_t> > ALIGNMENT;
const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment)
{
// typedef std::vector< std::set<size_t> > ALIGNMENT;
outSourceToTargetAlignment->clear();
size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size());
outSourceToTargetAlignment->resize(numberOfSourceSymbols);
// add alignment point
for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) {
for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) {
size_t sourcePosition = *setIter;
outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);