placeholders work in extract

This commit is contained in:
Hieu Hoang 2013-09-19 12:24:57 +02:00
parent 996740de73
commit 98bb4fa1c7
3 changed files with 42 additions and 16 deletions

View File

@ -122,5 +122,18 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
return true;
}
void SentenceAlignment::invertAlignment()
{
alignedToS.resize(source.size());
for (size_t targetPos = 0; targetPos < alignedToT.size(); ++targetPos) {
const std::vector<int> &vec = alignedToT[targetPos];
for (size_t i = 0; i < vec.size(); ++i) {
int sourcePos = vec[i];
alignedToS[sourcePos].push_back(targetPos);
}
}
}
}

View File

@ -33,7 +33,7 @@ public:
std::vector<std::string> target;
std::vector<std::string> source;
std::vector<int> alignedCountS;
std::vector<std::vector<int> > alignedToT;
std::vector<std::vector<int> > alignedToT, alignedToS;
int sentenceID;
std::string weightString;
@ -46,6 +46,8 @@ public:
bool create(char targetString[], char sourceString[],
char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
void invertAlignment();
};
}

View File

@ -299,6 +299,9 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
if (options.placeholders.size()) {
sentence.invertAlignment();
}
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation, extractFileContext, extractFileContextInv);
task->Run();
delete task;
@ -712,7 +715,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
return;
}
if (!checkPlaceholders(sentence, startE, endE, startF, endF)) {
if (m_options.placeholders.size() && !checkPlaceholders(sentence, startE, endE, startF, endF)) {
return;
}
@ -911,26 +914,18 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
}
bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF)
{
for (size_t pos = startF; pos <= endF; ++pos) {
const string &word = sentence.source[pos];
if (isPlaceholder(word)) {
if (sentence.alignedCountS[pos] != 1) {
return false;
}
}
}
for (size_t pos = startE; pos <= endE; ++pos) {
const string &word = sentence.target[pos];
if (isPlaceholder(word)) {
if (sentence.alignedToT[pos].size() != 1) {
return false;
const string &sourceWord = sentence.source[pos];
if (isPlaceholder(sourceWord)) {
if (sentence.alignedToS.at(pos).size() != 1) {
return false;
}
else {
// check it actually lines up to another placeholder
int targetPos = sentence.alignedToT[pos][0];
int targetPos = sentence.alignedToS.at(pos).at(0);
const string &otherWord = sentence.target[targetPos];
if (!isPlaceholder(otherWord)) {
return false;
@ -939,6 +934,22 @@ bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int star
}
}
for (size_t pos = startE; pos <= endE; ++pos) {
const string &targetWord = sentence.target[pos];
if (isPlaceholder(targetWord)) {
if (sentence.alignedToT.at(pos).size() != 1) {
return false;
}
else {
// check it actually lines up to another placeholder
int sourcePos = sentence.alignedToT.at(pos).at(0);
const string &otherWord = sentence.source[sourcePos];
if (!isPlaceholder(otherWord)) {
return false;
}
}
}
}
return true;
}