single word heuristic for phrase extraction,

and minor modification of SentenceAlignmentWithSyntax constructor
This commit is contained in:
Matthias Huck 2016-02-03 21:35:26 +00:00
parent 16a49d0d8d
commit 5de88ec1a4
6 changed files with 30 additions and 19 deletions

View File

@ -51,6 +51,7 @@ private:
bool gzOutput;
std::string instanceWeightsFile; //weights for each sentence
bool flexScoreFlag;
bool singleWordHeuristicFlag;
public:
std::vector<std::string> placeholders;
@ -73,6 +74,7 @@ public:
onlyOutputSpanInfo(false),
gzOutput(false),
flexScoreFlag(false),
singleWordHeuristicFlag(false),
debug(false) {
}
@ -119,6 +121,9 @@ public:
void initFlexScoreFlag(const bool initflexScoreFlag) {
flexScoreFlag=initflexScoreFlag;
}
void initSingleWordHeuristicFlag(const bool initSingleWordHeuristicFlag) {
singleWordHeuristicFlag = initSingleWordHeuristicFlag;
}
// functions for getting values
bool isAllModelsOutputFlag() const {
@ -163,6 +168,9 @@ public:
bool isFlexScoreFlag() const {
return flexScoreFlag;
}
bool isSingleWordHeuristicFlag() const {
return singleWordHeuristicFlag;
}
};
}

View File

@ -18,8 +18,6 @@
***********************************************************************/
#pragma once
#ifndef RULEEXTRACTIONOPTIONS_H_INCLUDED_
#define RULEEXTRACTIONOPTIONS_H_INCLUDED_
namespace MosesTraining
{
@ -95,4 +93,3 @@ public:
}
#endif

View File

@ -35,7 +35,7 @@ namespace MosesTraining
bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules)
{
if (!m_options.targetSyntax) {
if (!m_targetSyntax) {
return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules);
}
@ -56,7 +56,7 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin
bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules)
{
if (!m_options.sourceSyntax) {
if (!m_sourceSyntax) {
return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules);
}

View File

@ -18,8 +18,6 @@
***********************************************************************/
#pragma once
#ifndef SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_
#define SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_
#include <map>
#include <set>
@ -42,18 +40,20 @@ public:
std::set<std::string> & m_sourceLabelCollection;
std::map<std::string, int> & m_targetTopLabelCollection;
std::map<std::string, int> & m_sourceTopLabelCollection;
const RuleExtractionOptions & m_options;
const bool m_targetSyntax, m_sourceSyntax;
SentenceAlignmentWithSyntax(std::set<std::string> & tgtLabelColl,
std::set<std::string> & srcLabelColl,
std::map<std::string,int> & tgtTopLabelColl,
std::map<std::string,int> & srcTopLabelColl,
const RuleExtractionOptions & options)
bool targetSyntax,
bool sourceSyntax)
: m_targetLabelCollection(tgtLabelColl)
, m_sourceLabelCollection(srcLabelColl)
, m_targetTopLabelCollection(tgtTopLabelColl)
, m_sourceTopLabelCollection(srcTopLabelColl)
, m_options(options) {
, m_targetSyntax(targetSyntax)
, m_sourceSyntax(sourceSyntax) {
}
virtual ~SentenceAlignmentWithSyntax() {}
@ -67,4 +67,3 @@ public:
}
#endif

View File

@ -155,6 +155,8 @@ int main(int argc, char* argv[])
options.initOrientationFlag(true);
} else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
options.initFlexScoreFlag(true);
} else if (strcmp(argv[i],"--SingleWordHeuristic") == 0) {
options.initSingleWordHeuristicFlag(true);
} else if (strcmp(argv[i],"--NoTTable") == 0) {
options.initTranslationFlag(false);
} else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
@ -413,18 +415,22 @@ void ExtractTask::extract(SentenceAlignment &sentence)
}
// cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
if (!out_of_bounds) {
if (!out_of_bounds ||
( m_options.isSingleWordHeuristicFlag() && (endE==startE) && (minF==maxF) )) // extraction of single word phrases even if inconsistent wrt. word alignment
{
// start point of source phrase may retreat over unaligned
for(int startF=minF;
(startF>=0 &&
(relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
(startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
((startF>=0 &&
(relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
(startF==minF || sentence.alignedCountS[startF]==0)) && // unaligned
(!out_of_bounds || (startF==minF))); // if out of bounds, but single word heuristic: don't retreat over unaligned
startF--)
// end point of source phrase may advance over unaligned
for(int endF=maxF;
(endF<countF &&
(relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
(endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
((endF<countF &&
(relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
(endF==maxF || sentence.alignedCountS[endF]==0)) && // unaligned
(!out_of_bounds || (endF==maxF))); // if out of bounds, but single word heuristic: don't advance over unaligned
endF++) { // at this point we have extracted a phrase
if(buildExtraStructure) { // phrase || hier
if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit

View File

@ -347,7 +347,8 @@ int main(int argc, char* argv[])
SentenceAlignmentWithSyntax sentence
(targetLabelCollection, sourceLabelCollection,
targetTopLabelCollection, sourceTopLabelCollection, options);
targetTopLabelCollection, sourceTopLabelCollection,
options.targetSyntax, options.sourceSyntax);
//az: output src, tgt, and alingment line
if (options.onlyOutputSpanInfo) {
cout << "LOG: SRC: " << sourceString << endl;