mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
single word heuristic for phrase extraction,
and minor modification of SentenceAlignmentWithSyntax constructor
This commit is contained in:
parent
16a49d0d8d
commit
5de88ec1a4
@ -51,6 +51,7 @@ private:
|
||||
bool gzOutput;
|
||||
std::string instanceWeightsFile; //weights for each sentence
|
||||
bool flexScoreFlag;
|
||||
bool singleWordHeuristicFlag;
|
||||
|
||||
public:
|
||||
std::vector<std::string> placeholders;
|
||||
@ -73,6 +74,7 @@ public:
|
||||
onlyOutputSpanInfo(false),
|
||||
gzOutput(false),
|
||||
flexScoreFlag(false),
|
||||
singleWordHeuristicFlag(false),
|
||||
debug(false) {
|
||||
}
|
||||
|
||||
@ -119,6 +121,9 @@ public:
|
||||
void initFlexScoreFlag(const bool initflexScoreFlag) {
|
||||
flexScoreFlag=initflexScoreFlag;
|
||||
}
|
||||
void initSingleWordHeuristicFlag(const bool initSingleWordHeuristicFlag) {
|
||||
singleWordHeuristicFlag = initSingleWordHeuristicFlag;
|
||||
}
|
||||
|
||||
// functions for getting values
|
||||
bool isAllModelsOutputFlag() const {
|
||||
@ -163,6 +168,9 @@ public:
|
||||
bool isFlexScoreFlag() const {
|
||||
return flexScoreFlag;
|
||||
}
|
||||
bool isSingleWordHeuristicFlag() const {
|
||||
return singleWordHeuristicFlag;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -18,8 +18,6 @@
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef RULEEXTRACTIONOPTIONS_H_INCLUDED_
|
||||
#define RULEEXTRACTIONOPTIONS_H_INCLUDED_
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
@ -95,4 +93,3 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@ namespace MosesTraining
|
||||
|
||||
bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules)
|
||||
{
|
||||
if (!m_options.targetSyntax) {
|
||||
if (!m_targetSyntax) {
|
||||
return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules);
|
||||
}
|
||||
|
||||
@ -56,7 +56,7 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin
|
||||
|
||||
bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules)
|
||||
{
|
||||
if (!m_options.sourceSyntax) {
|
||||
if (!m_sourceSyntax) {
|
||||
return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules);
|
||||
}
|
||||
|
||||
|
@ -18,8 +18,6 @@
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_
|
||||
#define SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
@ -42,18 +40,20 @@ public:
|
||||
std::set<std::string> & m_sourceLabelCollection;
|
||||
std::map<std::string, int> & m_targetTopLabelCollection;
|
||||
std::map<std::string, int> & m_sourceTopLabelCollection;
|
||||
const RuleExtractionOptions & m_options;
|
||||
const bool m_targetSyntax, m_sourceSyntax;
|
||||
|
||||
SentenceAlignmentWithSyntax(std::set<std::string> & tgtLabelColl,
|
||||
std::set<std::string> & srcLabelColl,
|
||||
std::map<std::string,int> & tgtTopLabelColl,
|
||||
std::map<std::string,int> & srcTopLabelColl,
|
||||
const RuleExtractionOptions & options)
|
||||
bool targetSyntax,
|
||||
bool sourceSyntax)
|
||||
: m_targetLabelCollection(tgtLabelColl)
|
||||
, m_sourceLabelCollection(srcLabelColl)
|
||||
, m_targetTopLabelCollection(tgtTopLabelColl)
|
||||
, m_sourceTopLabelCollection(srcTopLabelColl)
|
||||
, m_options(options) {
|
||||
, m_targetSyntax(targetSyntax)
|
||||
, m_sourceSyntax(sourceSyntax) {
|
||||
}
|
||||
|
||||
virtual ~SentenceAlignmentWithSyntax() {}
|
||||
@ -67,4 +67,3 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -155,6 +155,8 @@ int main(int argc, char* argv[])
|
||||
options.initOrientationFlag(true);
|
||||
} else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
|
||||
options.initFlexScoreFlag(true);
|
||||
} else if (strcmp(argv[i],"--SingleWordHeuristic") == 0) {
|
||||
options.initSingleWordHeuristicFlag(true);
|
||||
} else if (strcmp(argv[i],"--NoTTable") == 0) {
|
||||
options.initTranslationFlag(false);
|
||||
} else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
|
||||
@ -413,18 +415,22 @@ void ExtractTask::extract(SentenceAlignment &sentence)
|
||||
}
|
||||
|
||||
// cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
|
||||
if (!out_of_bounds) {
|
||||
if (!out_of_bounds ||
|
||||
( m_options.isSingleWordHeuristicFlag() && (endE==startE) && (minF==maxF) )) // extraction of single word phrases even if inconsistent wrt. word alignment
|
||||
{
|
||||
// start point of source phrase may retreat over unaligned
|
||||
for(int startF=minF;
|
||||
(startF>=0 &&
|
||||
(relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
|
||||
(startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
|
||||
((startF>=0 &&
|
||||
(relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
|
||||
(startF==minF || sentence.alignedCountS[startF]==0)) && // unaligned
|
||||
(!out_of_bounds || (startF==minF))); // if out of bounds, but single word heuristic: don't retreat over unaligned
|
||||
startF--)
|
||||
// end point of source phrase may advance over unaligned
|
||||
for(int endF=maxF;
|
||||
(endF<countF &&
|
||||
(relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
|
||||
(endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
|
||||
((endF<countF &&
|
||||
(relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
|
||||
(endF==maxF || sentence.alignedCountS[endF]==0)) && // unaligned
|
||||
(!out_of_bounds || (endF==maxF))); // if out of bounds, but single word heuristic: don't advance over unaligned
|
||||
endF++) { // at this point we have extracted a phrase
|
||||
if(buildExtraStructure) { // phrase || hier
|
||||
if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
|
||||
|
@ -347,7 +347,8 @@ int main(int argc, char* argv[])
|
||||
|
||||
SentenceAlignmentWithSyntax sentence
|
||||
(targetLabelCollection, sourceLabelCollection,
|
||||
targetTopLabelCollection, sourceTopLabelCollection, options);
|
||||
targetTopLabelCollection, sourceTopLabelCollection,
|
||||
options.targetSyntax, options.sourceSyntax);
|
||||
//az: output src, tgt, and alingment line
|
||||
if (options.onlyOutputSpanInfo) {
|
||||
cout << "LOG: SRC: " << sourceString << endl;
|
||||
|
Loading…
Reference in New Issue
Block a user