mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
KenLM c1dba12
- Reject NaNs - Fix ChartState hashing (unused in Moses) - Expose CreateOrThrow - Minor portability improvement in getopt
This commit is contained in:
parent
d5efa27be9
commit
4bcd2c75ca
@ -112,7 +112,7 @@ inline size_t hash_value(const ChartState &state) {
|
||||
size_t hashes[2];
|
||||
hashes[0] = hash_value(state.left);
|
||||
hashes[1] = hash_value(state.right);
|
||||
return util::MurmurHashNative(hashes, sizeof(size_t), state.full);
|
||||
return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full);
|
||||
}
|
||||
|
||||
template <class M> class RuleScore {
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@ -93,7 +94,11 @@ void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
|
||||
case '\t':
|
||||
weights.backoff = in.ReadFloat();
|
||||
if (weights.backoff == ngram::kExtensionBackoff) weights.backoff = ngram::kNoExtensionBackoff;
|
||||
if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff");
|
||||
{
|
||||
int float_class = fpclassify(weights.backoff);
|
||||
UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << weights.backoff);
|
||||
}
|
||||
UTIL_THROW_IF((in.get() != '\n'), FormatLoadException, "Expected newline after backoff");
|
||||
break;
|
||||
case '\n':
|
||||
weights.backoff = ngram::kNoExtensionBackoff;
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace lm {
|
||||
|
||||
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number);
|
||||
@ -29,20 +31,26 @@ class PositiveProbWarn {
|
||||
|
||||
explicit PositiveProbWarn(WarningAction action) : action_(action) {}
|
||||
|
||||
void Warn(float prob);
|
||||
float ReadProb(util::FilePiece &f) {
|
||||
float prob = f.ReadFloat();
|
||||
UTIL_THROW_IF(f.get() != '\t', FormatLoadException, "Expected tab after probability");
|
||||
UTIL_THROW_IF(isnan(prob), FormatLoadException, "NaN probability");
|
||||
if (prob > 0.0) {
|
||||
Warn(prob);
|
||||
prob = 0.0;
|
||||
}
|
||||
return prob;
|
||||
}
|
||||
|
||||
private:
|
||||
void Warn(float prob);
|
||||
|
||||
WarningAction action_;
|
||||
};
|
||||
|
||||
template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {
|
||||
try {
|
||||
float prob = f.ReadFloat();
|
||||
if (prob > 0.0) {
|
||||
warn.Warn(prob);
|
||||
prob = 0.0;
|
||||
}
|
||||
if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability");
|
||||
float prob = warn.ReadProb(f);
|
||||
ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];
|
||||
value.prob = prob;
|
||||
ReadBackoff(f, value);
|
||||
@ -64,11 +72,7 @@ template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc
|
||||
// Return true if a positive log probability came out.
|
||||
template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights, PositiveProbWarn &warn) {
|
||||
try {
|
||||
weights.prob = f.ReadFloat();
|
||||
if (weights.prob > 0.0) {
|
||||
warn.Warn(weights.prob);
|
||||
weights.prob = 0.0;
|
||||
}
|
||||
weights.prob = warn.ReadProb(f);
|
||||
for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) {
|
||||
*vocab_out = vocab.Index(f.ReadDelimited(kARPASpaces));
|
||||
}
|
||||
|
10
util/file.cc
10
util/file.cc
@ -42,6 +42,16 @@ int OpenReadOrThrow(const char *name) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CreateOrThrow(const char *name) {
|
||||
int ret;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
|
||||
#else
|
||||
UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t SizeFile(int fd) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
__int64 ret = _filelengthi64(fd);
|
||||
|
@ -65,7 +65,10 @@ class scoped_FILE {
|
||||
std::FILE *file_;
|
||||
};
|
||||
|
||||
// Open for read only.
|
||||
int OpenReadOrThrow(const char *name);
|
||||
// Create file if it doesn't exist, truncate if it does. Opened for write.
|
||||
int CreateOrThrow(const char *name);
|
||||
|
||||
// Return value for SizeFile when it can't size properly.
|
||||
const uint64_t kBadSize = (uint64_t)-1;
|
||||
|
@ -10,6 +10,7 @@ Code given out at the 1985 UNIFORUM conference in Dallas.
|
||||
|
||||
#include "getopt.hh"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NULL 0
|
||||
#define EOF (-1)
|
||||
|
14
util/mmap.cc
14
util/mmap.cc
@ -170,20 +170,6 @@ void *MapZeroedWrite(int fd, std::size_t size) {
|
||||
return MapOrThrow(size, true, kFileFlags, false, fd, 0);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
int CreateOrThrow(const char *name) {
|
||||
int ret;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
|
||||
#else
|
||||
UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
|
||||
file.reset(CreateOrThrow(name));
|
||||
try {
|
||||
|
Loading…
Reference in New Issue
Block a user