sparse features in on-disk-pt

This commit is contained in:
Hieu Hoang 2014-06-06 20:00:15 +01:00
parent bdf4fb2d53
commit 0114766b54
2 changed files with 38 additions and 18 deletions

View File

@ -162,13 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// allocate mem // allocate mem
size_t numScores = onDiskWrapper.GetNumScores() size_t numScores = onDiskWrapper.GetNumScores()
,numAlign = GetAlign().size(); ,numAlign = GetAlign().size();
size_t sparseFeatureSize = m_sparseFeatures.size();
size_t propSize = m_property.size(); size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64) // file pos (phrase id) size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores + sizeof(float) * numScores // scores
+ sizeof(UINT64) // size of property string + sizeof(UINT64) + sparseFeatureSize // sparse features string
+ propSize; // actual property string + sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded); char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded); //memset(mem, 0, memNeeded);
@ -186,21 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// scores // scores
memUsed += WriteScoresToMemory(mem + memUsed); memUsed += WriteScoresToMemory(mem + memUsed);
// property string // sparse features
char *currPtr = (char*)mem + memUsed; memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);
UINT64 *memTmp = (UINT64*) currPtr;
memTmp[0] = propSize;
memUsed += sizeof(UINT64);
const char *propChar = m_property.c_str(); // property string
memcpy(mem + memUsed, propChar, propSize); memUsed += WriteStringToMemory(mem + memUsed, m_property);
memUsed += propSize;
//DebugMem(mem, memNeeded); //DebugMem(mem, memNeeded);
assert(memNeeded == memUsed); assert(memNeeded == memUsed);
return mem; return mem;
} }
size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
{
size_t memUsed = 0;
UINT64 *memTmp = (UINT64*) mem;
size_t strSize = str.size();
memTmp[0] = strSize;
memUsed += sizeof(UINT64);
const char *charStr = str.c_str();
memcpy(mem + memUsed, charStr, strSize);
memUsed += strSize;
return memUsed;
}
size_t TargetPhrase::WriteAlignToMemory(char *mem) const size_t TargetPhrase::WriteAlignToMemory(char *mem) const
{ {
size_t memUsed = 0; size_t memUsed = 0;
@ -294,6 +307,9 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores); ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply()); ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
// sparse features
ret->???(m_sparseFeatures);
// property // property
ret->SetProperties(m_property); ret->SetProperties(m_property);
@ -315,6 +331,9 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
memUsed += ReadScoresFromFile(fileTPColl); memUsed += ReadScoresFromFile(fileTPColl);
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg()); assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
// sparse features
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);
// properties // properties
memUsed += ReadStringFromFile(fileTPColl, m_property); memUsed += ReadStringFromFile(fileTPColl, m_property);
@ -325,19 +344,19 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
{ {
UINT64 bytesRead = 0; UINT64 bytesRead = 0;
UINT64 propSize; UINT64 strSize;
fileTPColl.read((char*) &propSize, sizeof(UINT64)); fileTPColl.read((char*) &strSize, sizeof(UINT64));
bytesRead += sizeof(UINT64); bytesRead += sizeof(UINT64);
if (propSize) { if (strSize) {
char *mem = (char*) malloc(propSize + 1); char *mem = (char*) malloc(strSize + 1);
mem[propSize] = '\0'; mem[strSize] = '\0';
fileTPColl.read(mem, propSize); fileTPColl.read(mem, strSize);
outStr = string(mem); outStr = string(mem);
free(mem); free(mem);
cerr << "outStr=" << outStr << endl; cerr << "outStr=" << outStr << endl;
bytesRead += propSize; bytesRead += strSize;
} }
return bytesRead; return bytesRead;

View File

@ -50,13 +50,14 @@ class TargetPhrase: public Phrase
protected: protected:
AlignType m_align; AlignType m_align;
PhrasePtr m_sourcePhrase; PhrasePtr m_sourcePhrase;
std::string m_property, m_sparseFeatures; std::string m_sparseFeatures, m_property;
std::vector<float> m_scores; std::vector<float> m_scores;
UINT64 m_filePos; UINT64 m_filePos;
size_t WriteAlignToMemory(char *mem) const; size_t WriteAlignToMemory(char *mem) const;
size_t WriteScoresToMemory(char *mem) const; size_t WriteScoresToMemory(char *mem) const;
size_t WriteStringToMemory(char *mem, const std::string &str) const;
UINT64 ReadAlignFromFile(std::fstream &fileTPColl); UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
UINT64 ReadScoresFromFile(std::fstream &fileTPColl); UINT64 ReadScoresFromFile(std::fstream &fileTPColl);