mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 14:05:29 +03:00
sparse features in on-disk-pt
This commit is contained in:
parent
bdf4fb2d53
commit
0114766b54
@ -162,13 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
|
|||||||
// allocate mem
|
// allocate mem
|
||||||
size_t numScores = onDiskWrapper.GetNumScores()
|
size_t numScores = onDiskWrapper.GetNumScores()
|
||||||
,numAlign = GetAlign().size();
|
,numAlign = GetAlign().size();
|
||||||
|
size_t sparseFeatureSize = m_sparseFeatures.size();
|
||||||
size_t propSize = m_property.size();
|
size_t propSize = m_property.size();
|
||||||
|
|
||||||
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
|
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
|
||||||
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
|
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
|
||||||
+ sizeof(float) * numScores // scores
|
+ sizeof(float) * numScores // scores
|
||||||
+ sizeof(UINT64) // size of property string
|
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
|
||||||
+ propSize; // actual property string
|
+ sizeof(UINT64) + propSize; // property string
|
||||||
|
|
||||||
char *mem = (char*) malloc(memNeeded);
|
char *mem = (char*) malloc(memNeeded);
|
||||||
//memset(mem, 0, memNeeded);
|
//memset(mem, 0, memNeeded);
|
||||||
@ -186,21 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
|
|||||||
// scores
|
// scores
|
||||||
memUsed += WriteScoresToMemory(mem + memUsed);
|
memUsed += WriteScoresToMemory(mem + memUsed);
|
||||||
|
|
||||||
// property string
|
// sparse features
|
||||||
char *currPtr = (char*)mem + memUsed;
|
memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);
|
||||||
UINT64 *memTmp = (UINT64*) currPtr;
|
|
||||||
memTmp[0] = propSize;
|
|
||||||
memUsed += sizeof(UINT64);
|
|
||||||
|
|
||||||
const char *propChar = m_property.c_str();
|
// property string
|
||||||
memcpy(mem + memUsed, propChar, propSize);
|
memUsed += WriteStringToMemory(mem + memUsed, m_property);
|
||||||
memUsed += propSize;
|
|
||||||
|
|
||||||
//DebugMem(mem, memNeeded);
|
//DebugMem(mem, memNeeded);
|
||||||
assert(memNeeded == memUsed);
|
assert(memNeeded == memUsed);
|
||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
|
||||||
|
{
|
||||||
|
size_t memUsed = 0;
|
||||||
|
UINT64 *memTmp = (UINT64*) mem;
|
||||||
|
|
||||||
|
size_t strSize = str.size();
|
||||||
|
memTmp[0] = strSize;
|
||||||
|
memUsed += sizeof(UINT64);
|
||||||
|
|
||||||
|
const char *charStr = str.c_str();
|
||||||
|
memcpy(mem + memUsed, charStr, strSize);
|
||||||
|
memUsed += strSize;
|
||||||
|
|
||||||
|
return memUsed;
|
||||||
|
}
|
||||||
|
|
||||||
size_t TargetPhrase::WriteAlignToMemory(char *mem) const
|
size_t TargetPhrase::WriteAlignToMemory(char *mem) const
|
||||||
{
|
{
|
||||||
size_t memUsed = 0;
|
size_t memUsed = 0;
|
||||||
@ -294,6 +307,9 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
|
|||||||
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
|
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
|
||||||
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
|
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
|
||||||
|
|
||||||
|
// sparse features
|
||||||
|
ret->???(m_sparseFeatures);
|
||||||
|
|
||||||
// property
|
// property
|
||||||
ret->SetProperties(m_property);
|
ret->SetProperties(m_property);
|
||||||
|
|
||||||
@ -315,6 +331,9 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
|
|||||||
memUsed += ReadScoresFromFile(fileTPColl);
|
memUsed += ReadScoresFromFile(fileTPColl);
|
||||||
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
|
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
|
||||||
|
|
||||||
|
// sparse features
|
||||||
|
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);
|
||||||
|
|
||||||
// properties
|
// properties
|
||||||
memUsed += ReadStringFromFile(fileTPColl, m_property);
|
memUsed += ReadStringFromFile(fileTPColl, m_property);
|
||||||
|
|
||||||
@ -325,19 +344,19 @@ UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &o
|
|||||||
{
|
{
|
||||||
UINT64 bytesRead = 0;
|
UINT64 bytesRead = 0;
|
||||||
|
|
||||||
UINT64 propSize;
|
UINT64 strSize;
|
||||||
fileTPColl.read((char*) &propSize, sizeof(UINT64));
|
fileTPColl.read((char*) &strSize, sizeof(UINT64));
|
||||||
bytesRead += sizeof(UINT64);
|
bytesRead += sizeof(UINT64);
|
||||||
|
|
||||||
if (propSize) {
|
if (strSize) {
|
||||||
char *mem = (char*) malloc(propSize + 1);
|
char *mem = (char*) malloc(strSize + 1);
|
||||||
mem[propSize] = '\0';
|
mem[strSize] = '\0';
|
||||||
fileTPColl.read(mem, propSize);
|
fileTPColl.read(mem, strSize);
|
||||||
outStr = string(mem);
|
outStr = string(mem);
|
||||||
free(mem);
|
free(mem);
|
||||||
cerr << "outStr=" << outStr << endl;
|
cerr << "outStr=" << outStr << endl;
|
||||||
|
|
||||||
bytesRead += propSize;
|
bytesRead += strSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
return bytesRead;
|
return bytesRead;
|
||||||
|
@ -50,13 +50,14 @@ class TargetPhrase: public Phrase
|
|||||||
protected:
|
protected:
|
||||||
AlignType m_align;
|
AlignType m_align;
|
||||||
PhrasePtr m_sourcePhrase;
|
PhrasePtr m_sourcePhrase;
|
||||||
std::string m_property, m_sparseFeatures;
|
std::string m_sparseFeatures, m_property;
|
||||||
|
|
||||||
std::vector<float> m_scores;
|
std::vector<float> m_scores;
|
||||||
UINT64 m_filePos;
|
UINT64 m_filePos;
|
||||||
|
|
||||||
size_t WriteAlignToMemory(char *mem) const;
|
size_t WriteAlignToMemory(char *mem) const;
|
||||||
size_t WriteScoresToMemory(char *mem) const;
|
size_t WriteScoresToMemory(char *mem) const;
|
||||||
|
size_t WriteStringToMemory(char *mem, const std::string &str) const;
|
||||||
|
|
||||||
UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
|
UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
|
||||||
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
|
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
|
||||||
|
Loading…
Reference in New Issue
Block a user