merge Phrase::CreateFromString() and Phrase::CreateFromStringNewFormat()

This commit is contained in:
Hieu Hoang 2013-03-08 19:10:28 +00:00
parent d015ef2e62
commit 9298402dcf
13 changed files with 32 additions and 26 deletions

View File

@ -87,9 +87,9 @@ int main(int argc, char** argv)
c_mask.push_back(0);
}
Phrase e( 0),f(0),c(0);
e.CreateFromString(e_mask, query_e, "|");
f.CreateFromString(f_mask, query_f, "|");
c.CreateFromString(c_mask, query_c,"|");
e.CreateFromString(Output, e_mask, query_e, "|");
f.CreateFromString(Input, f_mask, query_f, "|");
c.CreateFromString(Input, c_mask, query_c,"|");
LexicalReorderingTable* table;
if(FileExists(inFilePath+".binlexr.idx")) {
std::cerr << "Loading binary table...\n";

View File

@ -66,7 +66,7 @@ int main(int argc, char **argv)
std::string line;
while(getline(std::cin, line)) {
Phrase sourcePhrase;
sourcePhrase.CreateFromString(input, line, "||dummy_string||");
sourcePhrase.CreateFromString(Input, input, line, "||dummy_string||");
TargetPhraseVectorPtr decodedPhraseColl
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);

View File

@ -63,7 +63,7 @@ MockHypothesisGuard::MockHypothesisGuard(
Hypothesis* prevHypo = m_hypothesis;
WordsRange wordsRange(ai->first,ai->second);
m_targetPhrases.push_back(TargetPhrase());
m_targetPhrases.back().CreateFromString(factors,*ti,"|");
m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|");
m_toptions.push_back(new TranslationOption
(wordsRange,m_targetPhrases.back(),m_sentence));
m_hypothesis = Hypothesis::Create(*prevHypo,*m_toptions.back(),NULL);

View File

@ -152,7 +152,11 @@ void Phrase::PrependWord(const Word &newWord)
m_words[0] = newWord;
}
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder, const StringPiece &phraseString, const StringPiece &factorDelimiter)
void Phrase::CreateFromString(FactorDirection direction
,const std::vector<FactorType> &factorOrder
,const StringPiece &phraseString
,const StringPiece &factorDelimiter
,Word *lhs)
{
FactorCollection &factorCollection = FactorCollection::Instance();

View File

@ -70,9 +70,11 @@ public:
* \param phraseString formatted input string to parse
* \param factorDelimiter delimiter between factors.
*/
void CreateFromString(const std::vector<FactorType> &factorOrder
void CreateFromString(FactorDirection direction
, const std::vector<FactorType> &factorOrder
, const StringPiece &phraseString
, const StringPiece &factorDelimiter);
, const StringPiece &factorDelimiter
, Word *lhs = NULL);
void CreateFromStringNewFormat(FactorDirection direction
, const std::vector<FactorType> &factorOrder

View File

@ -124,7 +124,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
throw runtime_error(msg);
}
}
Phrase::CreateFromString(factorOrder, line, factorDelimiter);
Phrase::CreateFromString(Input, factorOrder, line, factorDelimiter);
if (staticData.IsChart()) {
InitStartEndWord();
@ -230,7 +230,7 @@ void Sentence::CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString
, const std::string &factorDelimiter)
{
Phrase::CreateFromString(factorOrder, phraseString, factorDelimiter);
Phrase::CreateFromString(Input, factorOrder, phraseString, factorDelimiter);
}

View File

@ -502,12 +502,12 @@ bool StaticData::LoadData(Parameter *parameter)
if (vecStr.size() == 1) {
sentenceID++;
Phrase phrase(0);
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
phrase.CreateFromString(Output, GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
m_constraints.insert(make_pair(sentenceID,phrase));
} else if (vecStr.size() == 2) {
sentenceID = Scan<long>(vecStr[0]);
Phrase phrase(0);
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
phrase.CreateFromString(Output, GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
m_constraints.insert(make_pair(sentenceID,phrase));
} else {
CHECK(false);

View File

@ -48,7 +48,7 @@ TargetPhrase::TargetPhrase( std::string out_string)
//ACAT
const StaticData &staticData = StaticData::Instance();
CreateFromString(staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter());
CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter());
}

View File

@ -46,9 +46,9 @@ bool BilingualDynSuffixArray::Load(
InputFileStream sourceStrme(source);
InputFileStream targetStrme(target);
cerr << "Loading source corpus...\n";
LoadCorpus(sourceStrme, m_inputFactors, *m_srcCorpus, m_srcSntBreaks, m_srcVocab);
LoadCorpus(Input, sourceStrme, m_inputFactors, *m_srcCorpus, m_srcSntBreaks, m_srcVocab);
cerr << "Loading target corpus...\n";
LoadCorpus(targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
LoadCorpus(Output, targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
CHECK(m_srcSntBreaks.size() == m_trgSntBreaks.size());
// build suffix arrays and auxilliary arrays
@ -83,10 +83,10 @@ bool BilingualDynSuffixArray::LoadTM(
InputFileStream targetStrme(target);
cerr << "Loading target corpus...\n";
LoadCorpus(targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
LoadCorpus(Output, targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
cerr << "Loading source corpus...\n";
LoadCorpus(sourceStrme, m_inputFactors, *m_srcCorpus, m_srcSntBreaks, m_srcVocab);
LoadCorpus(Input, sourceStrme, m_inputFactors, *m_srcCorpus, m_srcSntBreaks, m_srcVocab);
CHECK(m_srcSntBreaks.size() == m_trgSntBreaks.size());
@ -214,7 +214,7 @@ bool BilingualDynSuffixArray::ExtractPhrases(const int& sntIndex, const int& wor
return curSnt.Extract(m_maxPhraseLength, phrasePairs, leftIdx, rightIdx); // extract all phrase Alignments in sentence
}
int BilingualDynSuffixArray::LoadCorpus(InputFileStream& corpus, const FactorList& factors,
int BilingualDynSuffixArray::LoadCorpus(FactorDirection direction, InputFileStream& corpus, const FactorList& factors,
std::vector<wordID_t>& cArray, std::vector<wordID_t>& sntArray,
Vocab* vocab)
{
@ -226,7 +226,7 @@ int BilingualDynSuffixArray::LoadCorpus(InputFileStream& corpus, const FactorLis
sntArray.push_back(sntIdx);
Phrase phrase(ARRAY_SIZE_INCR);
// parse phrase
phrase.CreateFromString( factors, line, factorDelimiter);
phrase.CreateFromString(direction, factors, line, factorDelimiter);
// store words in vocabulary and corpus
for( size_t i = 0; i < phrase.GetSize(); ++i) {
cArray.push_back( vocab->GetWordID(phrase.GetWord(i)) );
@ -499,7 +499,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
const unsigned oldSrcCrpSize = m_srcCorpus->size(), oldTrgCrpSize = m_trgCorpus->size();
cerr << "old source corpus size = " << oldSrcCrpSize << "\told target size = " << oldTrgCrpSize << endl;
Phrase sphrase(ARRAY_SIZE_INCR);
sphrase.CreateFromString(m_inputFactors, source, factorDelimiter);
sphrase.CreateFromString(Input, m_inputFactors, source, factorDelimiter);
m_srcVocab->MakeOpen();
std::vector<wordID_t> sIDs(sphrase.GetSize());
// store words in vocabulary and corpus
@ -514,7 +514,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
m_srcSntBreaks.push_back(oldSrcCrpSize); // former end of corpus is index of new sentence
m_srcVocab->MakeClosed();
Phrase tphrase(ARRAY_SIZE_INCR);
tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
tphrase.CreateFromString(Output, m_outputFactors, target, factorDelimiter);
m_trgVocab->MakeOpen();
std::vector<wordID_t> tIDs(tphrase.GetSize());
for(int i = tphrase.GetSize()-1; i >= 0; --i) {

View File

@ -119,7 +119,7 @@ private:
mutable std::set<wordID_t> m_freqWordsCached;
const size_t m_maxPhraseLength, m_maxSampleSize;
int LoadCorpus(InputFileStream&, const std::vector<FactorType>& factors,
int LoadCorpus(FactorDirection direction, InputFileStream&, const std::vector<FactorType>& factors,
std::vector<wordID_t>&, std::vector<wordID_t>&,
Vocab*);
int LoadAlignments(InputFileStream& aligs);

View File

@ -107,7 +107,7 @@ bool PhraseDictionaryMemory::InitDictionary()
//target
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase());
targetPhrase->CreateFromString(m_output, targetPhraseString, factorDelimiter);
targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter);
scv.clear();
for (util::TokenIter<util::AnyCharacter, true> token(scoreString, util::AnyCharacter(" \t")); token; ++token) {
@ -166,7 +166,7 @@ bool PhraseDictionaryMemory::InitDictionary()
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
sourcePhrase.CreateFromString(m_input, sourcePhraseString, factorDelimiter);
sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter);
//Now that the source phrase is ready, we give the target phrase a copy
targetPhrase->SetSourcePhrase(sourcePhrase);
if (preSourceString == sourcePhraseString && preSourceNode) {

View File

@ -169,7 +169,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
for (size_t i=0; i<altTexts.size(); ++i) {
// set target phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],factorDelimiter);
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter);
// set constituent label
string targetLHSstr;

View File

@ -340,7 +340,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
WordsRange range(startPos,endPos-1); // span covered by phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],factorDelimiter);
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter);
targetPhrase.SetScore(scoreValue);
// TODO: targetPhrase.SetSourcePhrase() ?