mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 10:59:21 +03:00
Fix corner case in trie builder context merging
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3890 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
948d916ca0
commit
cb848f41b3
@ -405,16 +405,16 @@ void WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &ngram_fil
|
||||
|
||||
class ContextReader {
|
||||
public:
|
||||
ContextReader() : length_(0) {}
|
||||
ContextReader() : valid_(false) {}
|
||||
|
||||
ContextReader(const char *name, size_t length) : file_(OpenOrThrow(name, "r")), length_(length), words_(length), valid_(true) {
|
||||
++*this;
|
||||
ContextReader(const char *name, unsigned char order) {
|
||||
Reset(name, order);
|
||||
}
|
||||
|
||||
void Reset(const char *name, size_t length) {
|
||||
void Reset(const char *name, unsigned char order) {
|
||||
file_.reset(OpenOrThrow(name, "r"));
|
||||
length_ = length;
|
||||
words_.resize(length);
|
||||
length_ = sizeof(WordIndex) * static_cast<size_t>(order);
|
||||
words_.resize(order);
|
||||
valid_ = true;
|
||||
++*this;
|
||||
}
|
||||
@ -448,14 +448,14 @@ void MergeContextFiles(const std::string &first_base, const std::string &second_
|
||||
const size_t context_size = sizeof(WordIndex) * (order - 1);
|
||||
std::string first_name(first_base + kContextSuffix);
|
||||
std::string second_name(second_base + kContextSuffix);
|
||||
ContextReader first(first_name.c_str(), context_size), second(second_name.c_str(), context_size);
|
||||
ContextReader first(first_name.c_str(), order - 1), second(second_name.c_str(), order - 1);
|
||||
RemoveOrThrow(first_name.c_str());
|
||||
RemoveOrThrow(second_name.c_str());
|
||||
std::string out_name(out_base + kContextSuffix);
|
||||
util::scoped_FILE out(OpenOrThrow(out_name.c_str(), "w"));
|
||||
while (first && second) {
|
||||
for (const WordIndex *f = *first, *s = *second; ; ++f, ++s) {
|
||||
if (f == *first + order) {
|
||||
if (f == *first + order - 1) {
|
||||
// Equal.
|
||||
WriteOrThrow(out.get(), *first, context_size);
|
||||
++first;
|
||||
@ -474,7 +474,10 @@ void MergeContextFiles(const std::string &first_base, const std::string &second_
|
||||
}
|
||||
}
|
||||
}
|
||||
CopyRestOrThrow((first ? first : second).GetFile(), out.get());
|
||||
ContextReader &remaining = first ? first : second;
|
||||
if (!remaining) return;
|
||||
WriteOrThrow(out.get(), *remaining, context_size);
|
||||
CopyRestOrThrow(remaining.GetFile(), out.get());
|
||||
}
|
||||
|
||||
void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, util::scoped_memory &mem, const std::string &file_prefix, unsigned char order) {
|
||||
@ -776,7 +779,7 @@ void BuildTrie(const std::string &file_prefix, const std::vector<uint64_t> &coun
|
||||
inputs[i-2].Init(assembled.str(), i);
|
||||
RemoveOrThrow(assembled.str().c_str());
|
||||
assembled << kContextSuffix;
|
||||
contexts[i-2].Reset(assembled.str().c_str(), (i-1) * sizeof(WordIndex));
|
||||
contexts[i-2].Reset(assembled.str().c_str(), i-1);
|
||||
RemoveOrThrow(assembled.str().c_str());
|
||||
}
|
||||
|
||||
@ -810,7 +813,7 @@ void BuildTrie(const std::string &file_prefix, const std::vector<uint64_t> &coun
|
||||
++contexts[0];
|
||||
}
|
||||
}
|
||||
unlink(name.c_str());
|
||||
RemoveOrThrow(name.c_str());
|
||||
}
|
||||
|
||||
// Do not disable this error message or else too little state will be returned. Both WriteEntries::Middle and returning state based on found n-grams will need to be fixed to handle this situation.
|
||||
|
Loading…
Reference in New Issue
Block a user