mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
placeholders work in extract
This commit is contained in:
parent
996740de73
commit
98bb4fa1c7
@ -122,5 +122,18 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
|
||||
return true;
|
||||
}
|
||||
|
||||
void SentenceAlignment::invertAlignment()
|
||||
{
|
||||
alignedToS.resize(source.size());
|
||||
for (size_t targetPos = 0; targetPos < alignedToT.size(); ++targetPos) {
|
||||
const std::vector<int> &vec = alignedToT[targetPos];
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
int sourcePos = vec[i];
|
||||
alignedToS[sourcePos].push_back(targetPos);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
std::vector<std::string> target;
|
||||
std::vector<std::string> source;
|
||||
std::vector<int> alignedCountS;
|
||||
std::vector<std::vector<int> > alignedToT;
|
||||
std::vector<std::vector<int> > alignedToT, alignedToS;
|
||||
int sentenceID;
|
||||
std::string weightString;
|
||||
|
||||
@ -46,6 +46,8 @@ public:
|
||||
bool create(char targetString[], char sourceString[],
|
||||
char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
|
||||
|
||||
void invertAlignment();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -299,6 +299,9 @@ int main(int argc, char* argv[])
|
||||
cout << "LOG: PHRASES_BEGIN:" << endl;
|
||||
}
|
||||
if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
|
||||
if (options.placeholders.size()) {
|
||||
sentence.invertAlignment();
|
||||
}
|
||||
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation, extractFileContext, extractFileContextInv);
|
||||
task->Run();
|
||||
delete task;
|
||||
@ -712,7 +715,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
|
||||
return;
|
||||
}
|
||||
|
||||
if (!checkPlaceholders(sentence, startE, endE, startF, endF)) {
|
||||
if (m_options.placeholders.size() && !checkPlaceholders(sentence, startE, endE, startF, endF)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -911,26 +914,18 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF)
|
||||
{
|
||||
for (size_t pos = startF; pos <= endF; ++pos) {
|
||||
const string &word = sentence.source[pos];
|
||||
if (isPlaceholder(word)) {
|
||||
if (sentence.alignedCountS[pos] != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t pos = startE; pos <= endE; ++pos) {
|
||||
const string &word = sentence.target[pos];
|
||||
if (isPlaceholder(word)) {
|
||||
if (sentence.alignedToT[pos].size() != 1) {
|
||||
return false;
|
||||
const string &sourceWord = sentence.source[pos];
|
||||
if (isPlaceholder(sourceWord)) {
|
||||
if (sentence.alignedToS.at(pos).size() != 1) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
// check it actually lines up to another placeholder
|
||||
int targetPos = sentence.alignedToT[pos][0];
|
||||
int targetPos = sentence.alignedToS.at(pos).at(0);
|
||||
const string &otherWord = sentence.target[targetPos];
|
||||
if (!isPlaceholder(otherWord)) {
|
||||
return false;
|
||||
@ -939,6 +934,22 @@ bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int star
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t pos = startE; pos <= endE; ++pos) {
|
||||
const string &targetWord = sentence.target[pos];
|
||||
if (isPlaceholder(targetWord)) {
|
||||
if (sentence.alignedToT.at(pos).size() != 1) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
// check it actually lines up to another placeholder
|
||||
int sourcePos = sentence.alignedToT.at(pos).at(0);
|
||||
const string &otherWord = sentence.source[sourcePos];
|
||||
if (!isPlaceholder(otherWord)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user