This commit is contained in:
Hieu Hoang 2013-08-23 13:53:30 +01:00
parent 0613d98beb
commit 4dfb625536
6 changed files with 78 additions and 77 deletions

View File

@ -217,12 +217,11 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << "UNK" << *factor;
out << "UNK" << *factor;
} else {
out << *factor;
}
else {
out << *factor;
}
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
CHECK(factor);

View File

@ -311,7 +311,8 @@ private:
};
inline void swap(FVector &first, FVector &second) {
inline void swap(FVector &first, FVector &second)
{
swap(first.m_features, second.m_features);
swap(first.m_coreFeatures, second.m_coreFeatures);
}

View File

@ -417,7 +417,8 @@ struct SCCPlus {
}
};
inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second) {
inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second)
{
swap(first.m_scores, second.m_scores);
}

View File

@ -201,7 +201,8 @@ void TargetPhrase::Merge(const TargetPhrase &copy, const std::vector<FactorType>
m_fullScore += copy.m_fullScore;
}
void swap(TargetPhrase &first, TargetPhrase &second) {
void swap(TargetPhrase &first, TargetPhrase &second)
{
first.SwapWords(second);
std::swap(first.m_fullScore, second.m_fullScore);
std::swap(first.m_futureScore, second.m_futureScore);

View File

@ -62,13 +62,13 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
cache[hash] = value;
} else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
ret = value.first;
ret = value.first;
}
} else {
// don't use cache. look up from phrase table
// don't use cache. look up from phrase table
ret = GetTargetPhraseCollectionNonCache(src);
}
@ -92,7 +92,7 @@ GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const& range) co
void PhraseDictionary::SetParameter(const std::string& key, const std::string& value)
{
if (key == "cache-size") {
m_maxCacheSize = Scan<size_t>(value);
m_maxCacheSize = Scan<size_t>(value);
} else if (key == "path") {
m_filePath = value;
} else if (key == "table-limit") {
@ -128,7 +128,7 @@ void PhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &phras
void PhraseDictionary::ReduceCache() const
{
CacheColl &cache = GetCache();
CacheColl &cache = GetCache();
if (cache.size() <= m_maxCacheSize) return; // not full
// find cutoff for last used time
@ -161,8 +161,8 @@ PhraseDictionary::CacheColl &PhraseDictionary::GetCache() const
CacheColl *cache;
cache = m_cache.get();
if (cache == NULL) {
cache = new CacheColl;
m_cache.reset(cache);
cache = new CacheColl;
m_cache.reset(cache);
}
CHECK(cache);
return *cache;

View File

@ -104,92 +104,91 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath) const
{
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
const Phrase &phrase = inputPath.GetPhrase();
const InputPath *prevInputPath = inputPath.GetPrevNode();
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
const Phrase &phrase = inputPath.GetPhrase();
const InputPath *prevInputPath = inputPath.GetPrevNode();
const OnDiskPt::PhraseNode *prevPtNode = NULL;
const OnDiskPt::PhraseNode *prevPtNode = NULL;
if (prevInputPath) {
prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this));
if (prevInputPath) {
prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this));
} else {
// Starting subphrase.
assert(phrase.GetSize() == 1);
prevPtNode = &wrapper.GetRootSourceNode();
}
if (prevPtNode) {
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
lastWord.OnlyTheseFactors(m_inputFactors);
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
if (lastWordOnDisk == NULL) {
// OOV according to this phrase table. Not possible to extend
inputPath.SetTargetPhrases(*this, NULL, NULL);
} else {
// Starting subphrase.
assert(phrase.GetSize() == 1);
prevPtNode = &wrapper.GetRootSourceNode();
}
if (prevPtNode) {
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
lastWord.OnlyTheseFactors(m_inputFactors);
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
if (lastWordOnDisk == NULL) {
// OOV according to this phrase table. Not possible to extend
inputPath.SetTargetPhrases(*this, NULL, NULL);
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
if (ptNode) {
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
} else {
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
if (ptNode) {
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
} else {
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
delete lastWordOnDisk;
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
delete lastWordOnDisk;
}
}
}
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
{
const TargetPhraseCollection *ret;
const TargetPhraseCollection *ret;
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
size_t hash = (size_t) ptNode->GetFilePos();
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
size_t hash = (size_t) ptNode->GetFilePos();
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
iter = cache.find(hash);
iter = cache.find(hash);
if (iter == cache.end()) {
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(ptNode);
if (ret) {
ret = new TargetPhraseCollection(*ret);
}
if (iter == cache.end()) {
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(ptNode);
if (ret) {
ret = new TargetPhraseCollection(*ret);
}
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
cache[hash] = value;
}
else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
cache[hash] = value;
} else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
ret = value.first;
}
} else {
ret = GetTargetPhraseCollectionNonCache(ptNode);
}
ret = value.first;
}
} else {
ret = GetTargetPhraseCollectionNonCache(ptNode);
}
return ret;
return ret;
}
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
{
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
vector<float> weightT = StaticData::Instance().GetWeights(this);
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
vector<float> weightT = StaticData::Instance().GetWeights(this);
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
TargetPhraseCollection *targetPhrases
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
TargetPhraseCollection *targetPhrases
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
delete targetPhrasesOnDisk;
delete targetPhrasesOnDisk;
return targetPhrases;
return targetPhrases;
}
} // namespace