fixed all warnings discovered by Visual Studio

This commit is contained in:
Frank Seide 2018-08-31 19:21:14 -07:00
parent 2bf44365ff
commit 14631160db
93 changed files with 440 additions and 397 deletions

16
src/3rd_party/cnpy/cnpy.cpp vendored Normal file → Executable file
View File

@ -70,19 +70,19 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
int loc1, loc2;
//fortran order
loc1 = header.find("fortran_order")+16;
loc1 = (int)header.find("fortran_order")+16;
fortran_order = (header.substr(loc1,5) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
loc1 = (int)header.find("(");
loc2 = (int)header.find(")");
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
if(str_shape.length() == 0) ndims = 0;
else if(str_shape[str_shape.size()-1] == ',') ndims = 1;
else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
else ndims = (unsigned int)std::count(str_shape.begin(),str_shape.end(),',')+1;
shape = new unsigned int[ndims];
for(unsigned int i = 0;i < ndims;i++) {
loc1 = str_shape.find(",");
loc1 = (int)str_shape.find(",");
shape[i] = atoi(str_shape.substr(0,loc1).c_str());
str_shape = str_shape.substr(loc1+1);
}
@ -90,15 +90,15 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr")+9;
loc1 = (int)header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
assert(littleEndian); littleEndian;
//char type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
loc2 = (int)str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}

22
src/3rd_party/cnpy/cnpy.h vendored Normal file → Executable file
View File

@ -70,7 +70,7 @@ namespace cnpy {
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') {
template<typename T> std::string tostring(T i, int /*pad*/ = 0, char /*padval*/ = ' ') {
std::stringstream s;
s << i;
return s.str();
@ -162,7 +162,7 @@ namespace cnpy {
unsigned long nels = 1;
for (int m=0; m<ndims; m++ ) nels *= shape[m];
int nbytes = nels*sizeof(T) + npy_header.size();
auto nbytes = nels*sizeof(T) + npy_header.size();
//get the CRC of the data to be added
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
@ -250,7 +250,7 @@ namespace cnpy {
name(name), type(type_)
{
shape = dataShape;
word_size = word_size_;
word_size = (unsigned int)word_size_;
bytes.resize(data.size());
std::copy(data.begin(), data.end(), bytes.begin());
}
@ -278,15 +278,15 @@ namespace cnpy {
const auto* shape = item.shape.data();
const auto type = item.type;
const auto word_size = item.word_size;
const unsigned int ndims = item.shape.size();
const unsigned int ndims = (unsigned int)item.shape.size();
std::vector<char> npy_header = create_npy_header(type,word_size,shape,ndims);
unsigned long nels = 1;
for (int m=0; m<ndims; m++ ) nels *= shape[m];
int nbytes = nels*word_size + npy_header.size();
for (size_t m=0; m<ndims; m++ ) nels *= shape[m];
auto nbytes = nels*word_size + npy_header.size();
//get the CRC of the data to be added
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],(uInt)npy_header.size());
crc = crc32(crc,(unsigned char*)data,nels*word_size);
//build the local header
@ -330,7 +330,7 @@ namespace cnpy {
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
//build footer
unsigned short nrecs = items.size();
auto nrecs = items.size();
std::vector<char> footer;
footer += "PK"; //first part of sig
footer += (unsigned short) 0x0605; //second part of sig
@ -347,7 +347,7 @@ namespace cnpy {
//close up
fflush(fp);
bool bad = ferror(fp);
bool bad = ferror(fp) != 0;
fclose(fp);
// move to final location (atomically)
@ -370,7 +370,7 @@ namespace cnpy {
dict += tostring(word_size);
dict += "', 'fortran_order': False, 'shape': (";
dict += tostring(shape[0]);
for(int i = 1;i < ndims;i++) {
for(size_t i = 1;i < ndims;i++) {
dict += ", ";
dict += tostring(shape[i]);
}
@ -382,7 +382,7 @@ namespace cnpy {
dict.back() = '\n';
std::vector<char> header;
header += (char) 0x93;
header += (char) (0x93 - 0x100);
header += "NUMPY";
header += (char) 0x01; //major version of numpy format
header += (char) 0x00; //minor version of numpy format

2
src/3rd_party/svd/svd.cpp vendored Normal file → Executable file
View File

@ -33,7 +33,7 @@ static double PYTHAG(double a, double b)
int dsvd(float *a, int m, int n, float *w, float *v)
{
int flag, i, its, j, jj, k, l, nm;
int flag, i, its, j, jj, k, l = 0, nm = 0; // (initializing to keep compiler happy)
double c, f, h, s, x, y, z;
double anorm = 0.0, g = 0.0, scale = 0.0;
double *rv1;

6
src/3rd_party/yaml-cpp/binary_renamed.cpp vendored Normal file → Executable file
View File

@ -79,11 +79,11 @@ std::vector<unsigned char> DecodeBase64(const std::string &input) {
value = (value << 6) | d;
if (i % 4 == 3) {
*out++ = value >> 16;
*out++ = (unsigned char)(value >> 16);
if (i > 0 && input[i - 1] != '=')
*out++ = value >> 8;
*out++ = (unsigned char)(value >> 8);
if (input[i] != '=')
*out++ = value;
*out++ = (unsigned char)value;
}
}

2
src/3rd_party/yaml-cpp/collectionstack.h vendored Normal file → Executable file
View File

@ -27,7 +27,7 @@ class CollectionStack {
collectionStack.push(type);
}
void PopCollectionType(CollectionType::value type) {
assert(type == GetCurCollectionType());
assert(type == GetCurCollectionType()); type;
collectionStack.pop();
}

4
src/3rd_party/yaml-cpp/emitterstate.cpp vendored Normal file → Executable file
View File

@ -98,10 +98,6 @@ EmitterNodeType::value EmitterState::NextGroupType(
else
return EmitterNodeType::FlowMap;
}
// can't happen
assert(false);
return EmitterNodeType::NoType;
}
void EmitterState::StartedDoc() {

4
src/3rd_party/yaml-cpp/emitterstate.h vendored Normal file → Executable file
View File

@ -167,10 +167,6 @@ class EmitterState {
else
return EmitterNodeType::BlockMap;
}
// can't get here
assert(false);
return EmitterNodeType::NoType;
}
};

3
src/3rd_party/yaml-cpp/node/convert.h vendored Normal file → Executable file
View File

@ -126,6 +126,8 @@ struct convert<_Null> {
} \
}
#pragma warning(push)
#pragma warning(disable: 4127) // conditional expression is constant (the std::numeric_limits constants in macro above)
#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
YAML_DEFINE_CONVERT_STREAMABLE(type, -)
@ -148,6 +150,7 @@ YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
#pragma warning(pop)
#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED

1
src/3rd_party/yaml-cpp/node_data.cpp vendored Normal file → Executable file
View File

@ -91,7 +91,6 @@ std::size_t node_data::size() const {
default:
return 0;
}
return 0;
}
void node_data::compute_seq_size() const {

8
src/3rd_party/yaml-cpp/singledocparser.cpp vendored Normal file → Executable file
View File

@ -166,10 +166,10 @@ void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
// check for null
if (!m_scanner.empty()) {
const Token& token = m_scanner.peek();
if (token.type == Token::BLOCK_ENTRY ||
token.type == Token::BLOCK_SEQ_END) {
eventHandler.OnNull(token.mark, NullAnchor);
const Token& token1 = m_scanner.peek();
if (token1.type == Token::BLOCK_ENTRY ||
token1.type == Token::BLOCK_SEQ_END) {
eventHandler.OnNull(token1.mark, NullAnchor);
continue;
}
}

2
src/command/marian.cpp Normal file → Executable file
View File

@ -68,6 +68,8 @@ bool configureMPI(int argc, char** argv, bool sync) {
"Your version of MPI does not support multi-threaded communication.");
enable = true;
#else
argc; argv; sync; // (unused)
#endif
return enable;
}

2
src/common/cli_helper.h Normal file → Executable file
View File

@ -10,7 +10,7 @@ namespace cli {
// helper to replace environment-variable expressions of the form ${VARNAME} in
// a string
static std::string InterpolateEnvVars(std::string str) {
static inline std::string InterpolateEnvVars(std::string str) {
// temporary workaround for MS-internal PhillyOnAzure cluster: warm storage
// presently has the form /hdfs/VC instead of /{gfs,hdfs}/CLUSTER/VC
#if 1

2
src/common/compile_time_crc32.h Normal file → Executable file
View File

@ -74,7 +74,7 @@ constexpr uint32_t crc32(const char* str) {
// This is the stop-recursion function
template <>
constexpr uint32_t crc32<size_t(-1)>(const char* str) {
constexpr uint32_t crc32<size_t(-1)>(const char*) {
return 0xFFFFFFFF;
}

2
src/common/config.cpp Normal file → Executable file
View File

@ -12,7 +12,7 @@ namespace marian {
size_t Config::seed = (size_t)time(0);
bool Config::has(const std::string& key) const {
return config_[key];
return !!config_[key];
}
YAML::Node Config::get(const std::string& key) const {

6
src/common/config.h Normal file → Executable file
View File

@ -30,7 +30,7 @@ public:
bool validate = false) {
std::vector<std::string> sargv;
utils::Split(options, sargv, " ");
int argc = sargv.size();
int argc = (int)sargv.size();
std::vector<char*> argv(argc);
for(int i = 0; i < argc; ++i)
@ -67,7 +67,7 @@ public:
try {
if(!get<bool>("ignore-model-config"))
loadModelParameters(get<std::string>("model"));
} catch(std::runtime_error& e) {
} catch(std::runtime_error&) {
LOG(info, "[config] No model configuration found in model file");
}
}
@ -76,7 +76,7 @@ public:
try {
if(!get<bool>("ignore-model-config"))
loadModelParameters(model);
} catch(std::runtime_error& e) {
} catch(std::runtime_error&) {
LOG(info, "[config] No model configuration found in model file");
}
}

View File

@ -56,7 +56,7 @@ uint16_t guess_terminal_width(uint16_t max_width) {
#endif
// couldn't determine terminal width
if(cols == 0)
cols = po::options_description::m_default_line_length;
cols = (uint16_t)po::options_description::m_default_line_length;
return max_width ? std::min(cols, max_width) : cols;
}
@ -73,7 +73,7 @@ const std::set<std::string> PATHS = {"model",
bool ConfigParser::has(const std::string& key) const {
return config_[key];
return !!config_[key];
}
void ConfigParser::validateOptions() const {
@ -288,6 +288,8 @@ void ConfigParser::addOptionsModel(po::options_description& desc) {
"Tie all embedding layers and output layer")
("transformer-heads", po::value<int>()->default_value(8),
"Number of heads in multi-head attention (transformer)")
("transformer-dim-ffn", po::value<int>()->default_value(2048),
"Size of position-wise feed-forward network (transformer)")
("transformer-no-projection", po::value<bool>()->zero_tokens()->default_value(false),
"Omit linear projection after multi-head attention (transformer)")
("transformer-dim-ffn", po::value<int>()->default_value(2048),
@ -332,6 +334,20 @@ void ConfigParser::addOptionsModel(po::options_description& desc) {
->multitoken(),
"Convolution window widths in char-s2s model")
#endif
// Frank's experiments
// Note: Don't forget to add these also in encoder_decoder.cpp, EncoderDecoder().
("use-direct-sent-end-prob", po::value<bool>()->zero_tokens()->default_value(false),
"Enable Frank's direct sentence-end model (experimental) (transformer, requires --transformer-heads-top)")
("transformer-heads-top", po::value<int>(), //->default_value(8),
"Number of heads in top layer, multi-head attention (transformer)")
("transformer-coverage", po::value<bool>()->zero_tokens()->default_value(false),
"Enable Frank's coverage model, top layer only (experimental) (transformer)")
("transformer-coverage-all", po::value<bool>()->zero_tokens()->default_value(false),
"Enable Frank's coverage model, all layers (experimental) (transformer)")
("transformer-alignment-weight-heads", po::value<bool>()->zero_tokens()->default_value(false),
"If deriving alignment and/or coverage from multi-head, learn interpolation weights (experimental) (transformer)")
("transformer-offset-embedding-range", po::value<int>()->default_value(0),
"Clipping range of offset embedding, 0 to disable (transformer)")
;
if(mode_ == ConfigMode::training) {
@ -488,7 +504,7 @@ void ConfigParser::addOptionsTraining(po::options_description& desc) {
"Epsilon for label smoothing (0 to disable)")
("clip-norm", po::value<double>()->default_value(1.f),
"Clip gradient norm to arg (0 to disable)")
("exponential-smoothing", po::value<float>()->default_value(0.f)->implicit_value(1e-4, "1e-4"),
("exponential-smoothing", po::value<float>()->default_value(0.f)->implicit_value(1e-4f, "1e-4"),
"Maintain smoothed version of parameters for validation and saving with smoothing factor arg. "
" 0 to disable.")
("guided-alignment", po::value<std::string>(),
@ -754,7 +770,7 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
return str;
};
bool loadConfig = vm_.count("config");
bool loadConfig = vm_.count("config") != 0;
bool reloadConfig
= (mode_ == ConfigMode::training)
&& boost::filesystem::exists(InterpolateEnvVarsIfRequested(
@ -832,6 +848,14 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
SET_OPTION("transformer-tied-layers", std::vector<size_t>);
SET_OPTION("transformer-guided-alignment-layer", std::string);
// Frank's experiments:
SET_OPTION("use-direct-sent-end-prob", bool);
SET_OPTION_NONDEFAULT("transformer-heads-top", int);
SET_OPTION("transformer-coverage", bool);
SET_OPTION("transformer-coverage-all", bool);
SET_OPTION("transformer-alignment-weight-heads", bool);
SET_OPTION("transformer-offset-embedding-range", int);
#ifdef CUDNN
SET_OPTION("char-stride", int);
SET_OPTION("char-highway", int);

4
src/common/definitions.h Normal file → Executable file
View File

@ -108,7 +108,7 @@ KEY(axis, int);
KEY(shape, Shape);
KEY(value, float);
KEY(fixed, bool);
KEY(prefix, std::string);
//KEY(prefix, std::string); // (conflicts with local variables named prefix)
KEY(final, bool);
KEY(output_last, bool);
KEY(mask, Expr);
@ -132,5 +132,5 @@ KEY(valid, Ptr<RunBase>);
KEY(lex_probs, Ptr<LexProbs>);
} // namespace keywords
const float NEMATUS_LN_EPS = 1e-5;
const float NEMATUS_LN_EPS = 1e-5f;
} // namespace marian

5
src/common/file_stream.h Normal file → Executable file
View File

@ -3,7 +3,12 @@
#include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/iostreams/device/file_descriptor.hpp>
#pragma warning(push)
#pragma warning(disable: 4458) // declaration of 'traits_type' hides class member
#pragma warning(disable: 4456) // declaration of 'c' hides previous local declaration
#pragma warning(disable: 4244) // conversion from 'int' to 'char', possible loss of data
#include <boost/iostreams/filter/gzip.hpp>
#pragma warning(pop)
#include <boost/iostreams/filtering_stream.hpp>
#include <iostream>
#include "3rd_party/exception.h"

6
src/common/io.cpp Normal file → Executable file
View File

@ -83,11 +83,11 @@ void loadItemsFromNpz(const std::string& fileName, std::vector<Item>& items) {
if(it.second->shape.size() == 1) {
shape.resize(2);
shape.set(0, 1);
shape.set(1, it.second->shape[0]);
shape.set(1, (size_t)it.second->shape[0]);
} else {
shape.resize(it.second->shape.size());
for(size_t i = 0; i < it.second->shape.size(); ++i)
shape.set(i, it.second->shape[i]);
for(int i = 0; i < it.second->shape.size(); ++i)
shape.set(i, (size_t)it.second->shape[i]);
}
Item item;

18
src/common/keywords.h Normal file → Executable file
View File

@ -71,7 +71,7 @@ public:
*
* @arg value The value to store in this object
*/
Keyword(Value value) : value_(value) {}
Keyword(Value val) : value_(val) {}
/**
* @brief Constructs a <code>Keyword</code> with no specified value.
@ -90,8 +90,8 @@ public:
*
* @return a new <code>Keyword</code> object containing the specified value
*/
Keyword<key, Value> operator=(Value value) const {
return Keyword<key, Value>(value);
Keyword<key, Value> operator=(Value val) const {
return Keyword<key, Value>(val);
}
/**
@ -141,22 +141,22 @@ struct True {};
struct False {};
template <typename Match, typename... Args>
typename Match::value_type opt(True foo,
typename Match::value_type dflt,
typename Match::value_type opt(True /*foo*/,
typename Match::value_type /*dflt*/,
Args... args) {
std::tuple<Args...> t(args...);
return std::get<Index<Match, std::tuple<Args...>>::value>(t)();
}
template <typename Match, typename... Args>
typename Match::value_type opt(False foo,
typename Match::value_type opt(False /*foo*/,
typename Match::value_type dflt,
Args... args) {
Args... /*args*/) {
return dflt;
}
template <typename Match, typename... Args>
typename Match::value_type Get(Match key,
typename Match::value_type Get(Match /*key*/,
typename Match::value_type dflt,
Args... args) {
constexpr bool match = is_one_of<Match, Args...>::value;
@ -165,7 +165,7 @@ typename Match::value_type Get(Match key,
}
template <typename Match, typename... Args>
constexpr bool Has(Match key, Args... args) {
constexpr bool Has(Match /*key*/, Args... args) {
return is_one_of<Match, Args...>::value;
}

2
src/common/options.h Normal file → Executable file
View File

@ -74,6 +74,6 @@ public:
return defaultValue;
}
bool has(const std::string& key) const { return options_[key]; }
bool has(const std::string& key) const { return !!options_[key]; }
};
} // namespace marian

21
src/common/shape.h Normal file → Executable file
View File

@ -38,7 +38,10 @@ public:
const int* data() const { return shape_.data(); }
int* data() { return shape_.data(); }
inline void set(int i, int val) { dim(i) = val; }
inline void set(int i, int val) { dim(i) = val; }
inline void set(size_t i, int val) { dim(i) = val; }
inline void set(int i, size_t val) { dim(i) = (int)val; }
inline void set(size_t i, size_t val) { dim(i) = (int)val; }
inline int& dim(int i) {
if(i >= 0) {
@ -55,20 +58,24 @@ public:
return shape_[size() + i];
}
}
inline const int& dim(int i) const {
return const_cast<Shape&>(*this).dim(i);
}
inline int& dim(size_t i) { return dim(int(i)); }
inline const int& dim(size_t i) const { return dim(int(i)); }
inline int operator[](int i) const { return dim(i); }
inline int operator[](int i) { return dim(i); }
inline int operator[](int i) { return dim(i); }
inline int operator[](size_t i) const { return dim(i); }
inline int operator[](size_t i) { return dim(i); }
inline int back() const { return shape_.back(); }
inline int& back() { return shape_.back(); }
inline int stride(int i) const {
std::vector<int> stride(shape_.size(), 1);
for(int j = shape_.size() - 2; j >= 0; --j)
for(int j = (int)shape_.size() - 2; j >= 0; --j)
stride[j] = stride[j + 1] * shape_[j + 1];
if(i >= 0)
@ -88,7 +95,7 @@ public:
d.resize(shape_.size());
std::vector<int> stride(shape_.size(), 1);
for(int j = shape_.size() - 2; j >= 0; --j)
for(int j = (int)shape_.size() - 2; j >= 0; --j)
stride[j] = stride[j + 1] * shape_[j + 1];
for(size_t j = 0; j < d.size(); ++j)
@ -116,7 +123,7 @@ public:
std::string toString() const {
std::stringstream strm;
strm << "shape=" << (*this)[0];
for(size_t i = 1; i < size(); ++i)
for(int i = 1; i < size(); ++i)
strm << "x" << (*this)[i];
strm << " size=" << elements();
return strm.str();
@ -135,7 +142,7 @@ public:
int axis(int ax) const {
if(ax < 0)
return size() + ax;
return (int)size() + ax;
else
return ax;
}

6
src/common/types.h Normal file → Executable file
View File

@ -40,11 +40,11 @@ static inline size_t sizeOf(Type type) {
}
static inline bool isSignedInt(Type type) {
return TypeClass::signed_type & type;
return (TypeClass::signed_type & type) != 0;
}
static inline bool isUnsignedInt(Type type) {
return TypeClass::unsigned_type & type;
return (TypeClass::unsigned_type & type) != 0;
}
static inline bool isInt(Type type) {
@ -52,7 +52,7 @@ static inline bool isInt(Type type) {
}
static inline bool isFloat(Type type) {
return TypeClass::float_type & type;
return (TypeClass::float_type & type) != 0;
}
template <typename T>

2
src/data/batch.h Normal file → Executable file
View File

@ -10,7 +10,7 @@ namespace data {
class Batch {
public:
virtual size_t size() const = 0;
virtual size_t words(int which = 0) const { return 0; };
virtual size_t words(int /*which*/ = 0) const { return 0; };
virtual size_t width() const { return 0; };
virtual size_t sizeTrg() const { return 0; };

2
src/data/batch_generator.h Normal file → Executable file
View File

@ -110,7 +110,7 @@ private:
while(!maxiBatch->empty()) {
// push item onto batch
batchVector.push_back(maxiBatch->top());
currentWords += batchVector.back()[0].size();
currentWords += (int)batchVector.back()[0].size();
maxiBatch->pop();
// Batch size based on sentences

2
src/data/corpus.h Normal file → Executable file
View File

@ -67,7 +67,7 @@ public:
maxDims.resize(ex.size(), 0);
for(size_t i = 0; i < ex.size(); ++i) {
if(ex[i].size() > (size_t)maxDims[i])
maxDims[i] = ex[i].size();
maxDims[i] = (int)ex[i].size();
}
sentenceIds.push_back(ex.getId());
}

View File

@ -216,9 +216,9 @@ void CorpusBase::addWeightsToSentenceTuple(const std::string& line,
void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
const std::vector<sample>& batchVector) {
int srcWords = batch->front()->batchWidth();
int trgWords = batch->back()->batchWidth();
int dimBatch = batch->getSentenceIds().size();
int srcWords = (int)batch->front()->batchWidth();
int trgWords = (int)batch->back()->batchWidth();
int dimBatch = (int)batch->getSentenceIds().size();
std::vector<float> aligns(srcWords * dimBatch * trgWords, 0.f);
@ -235,8 +235,8 @@ void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
void CorpusBase::addWeightsToBatch(Ptr<CorpusBatch> batch,
const std::vector<sample>& batchVector) {
int dimBatch = batch->size();
int trgWords = batch->back()->batchWidth();
int dimBatch = (int)batch->size();
int trgWords = (int)batch->back()->batchWidth();
auto sentenceLevel
= options_->get<std::string>("data-weighting-type") == "sentence";

30
src/data/corpus_base.h Normal file → Executable file
View File

@ -122,7 +122,7 @@ public:
* @param size Number of sentences
* @param width Number of words in the longest sentence
*/
SubBatch(int size, int width, const Ptr<Vocab>& vocab)
SubBatch(size_t size, size_t width, const Ptr<Vocab>& vocab)
: indices_(size * width, 0),
mask_(size * width, 0),
size_(size),
@ -176,31 +176,31 @@ public:
ABORT_IF(size_ == 0, "Encoutered sub-batch size of 0");
std::vector<Ptr<SubBatch>> splits;
size_t subSize = std::ceil(size_ / (float)n);
size_t subSize = size_t(std::ceil(size_ / (float)n));
size_t restSize = size_;
size_t pos = 0;
for(size_t k = 0; k < n; ++k) {
size_t __size__ = std::min(subSize, restSize);
if(__size__ > 0) {
auto sb = New<SubBatch>(__size__, width_, vocab_);
size_t size = std::min(subSize, restSize);
if(size > 0) {
auto sb = New<SubBatch>(size, width_, vocab_);
size_t __words__ = 0;
size_t words = 0;
for(size_t j = 0; j < width_; ++j) {
for(size_t i = 0; i < __size__; ++i) {
sb->data()[j * __size__ + i] = indices_[j * size_ + pos + i];
sb->mask()[j * __size__ + i] = mask_[j * size_ + pos + i];
for(size_t i = 0; i < size; ++i) {
sb->data()[j * size + i] = indices_[j * size_ + pos + i];
sb->mask()[j * size + i] = mask_[j * size_ + pos + i];
if(mask_[j * size_ + pos + i] != 0)
__words__++;
words++;
}
}
sb->setWords(__words__);
sb->setWords(words);
splits.push_back(sb);
restSize -= __size__;
pos += __size__;
restSize -= size;
pos += size;
}
}
return splits;
@ -309,7 +309,7 @@ public:
// set word indices to different values to avoid same hashes
std::fill(sb->data().begin(), sb->data().end(), idx++);
// mask: no items ask being masked out
std::fill(sb->mask().begin(), sb->mask().end(), 1);
std::fill(sb->mask().begin(), sb->mask().end(), 1.f);
batches.push_back(sb);
}
@ -326,7 +326,7 @@ public:
}
if(options->has("data-weighting")) {
int weightsSize = batchSize;
auto weightsSize = batchSize;
if(options->get<std::string>("data-weighting-type") != "sentence")
weightsSize *= lengths.back();
std::vector<float> weights(weightsSize, 1.f);

2
src/data/corpus_nbest.h Normal file → Executable file
View File

@ -58,7 +58,7 @@ public:
maxDims.resize(ex.size(), 0);
for(size_t i = 0; i < ex.size(); ++i) {
if(ex[i].size() > (size_t)maxDims[i])
maxDims[i] = ex[i].size();
maxDims[i] = (int)ex[i].size();
}
sentenceIds.push_back(ex.getId());
}

6
src/data/dataset.h Normal file → Executable file
View File

@ -82,7 +82,7 @@ public:
void push_back(Input input) { inputs_.push_back(input); }
virtual std::vector<Ptr<Batch>> split(size_t n) override { ABORT("Not implemented"); }
virtual std::vector<Ptr<Batch>> split(size_t /*n*/) override { ABORT("Not implemented"); }
Data& features() { return inputs_[0].data(); }
@ -115,7 +115,7 @@ public:
void shuffle() override { std::shuffle(examples_.begin(), examples_.end(), eng_); }
batch_ptr toBatch(const Examples& batchVector) override {
int batchSize = batchVector.size();
int batchSize = (int)batchVector.size();
std::vector<int> maxDims;
for(auto& ex : batchVector) {
@ -123,7 +123,7 @@ public:
maxDims.resize(ex.size(), 0);
for(size_t i = 0; i < ex.size(); ++i) {
if(ex[i].size() > (size_t)maxDims[i])
maxDims[i] = ex[i].size();
maxDims[i] = (int)ex[i].size();
}
}

4
src/data/rng_engine.h Normal file → Executable file
View File

@ -16,8 +16,8 @@ protected:
std::mt19937 eng_;
public:
RNGEngine() : eng_(Config::seed) {}
RNGEngine(size_t eng) : eng_(eng) {}
RNGEngine() : eng_((unsigned int)Config::seed) {}
RNGEngine(size_t eng) : eng_((unsigned int)eng) {}
std::string getRNGState() {
std::ostringstream oss;

2
src/data/shortlist.h Normal file → Executable file
View File

@ -79,7 +79,7 @@ public:
for(auto i : srcBatch->data())
idxSet.insert(i);
std::uniform_int_distribution<> dis(firstNum_, maxVocab_);
std::uniform_int_distribution<> dis((int)firstNum_, (int)maxVocab_);
while(idxSet.size() < total_ && idxSet.size() < maxVocab_)
idxSet.insert(dis(gen_));

2
src/data/text_input.h Normal file → Executable file
View File

@ -66,7 +66,7 @@ public:
maxDims.resize(ex.size(), 0);
for(size_t i = 0; i < ex.size(); ++i) {
if(ex[i].size() > (size_t)maxDims[i])
maxDims[i] = ex[i].size();
maxDims[i] = (int)ex[i].size();
}
sentenceIds.push_back(ex.getId());
}

4
src/data/vocab.h Normal file → Executable file
View File

@ -51,8 +51,8 @@ private:
typedef std::vector<std::string> Id2Str;
Id2Str id2str_;
Word eosId_ = -1;
Word unkId_ = -1;
Word eosId_ = (Word)-1;
Word unkId_ = (Word)-1;
class VocabFreqOrderer;
};

2
src/functional/approx.h Normal file → Executable file
View File

@ -59,7 +59,7 @@ struct Approx {
if(x <= -radius)
return 0;
if(x < radius) // +1 because 0 holds value for x < -radius
return (x + radius - offset) / ((2.f * radius) / pieces) + 1;
return int((x + radius - offset) / ((2.f * radius) / pieces) + 1);
return pieces + 1;
}

6
src/functional/operands.h Normal file → Executable file
View File

@ -13,7 +13,7 @@ using IsClass = typename std::enable_if<std::is_class<C>::value, C>::type;
template <int N>
struct Select {
template <typename T, typename... Args>
__HDI__ static auto apply(T&& arg, Args&&... args)
__HDI__ static auto apply(T&& /*arg*/, Args&&... args)
-> decltype(Select<N - 1>::apply(args...)) {
return Select<N - 1>::apply(args...);
}
@ -22,7 +22,7 @@ struct Select {
template <>
struct Select<0> {
template <typename T, typename... Args>
__HDI__ static T apply(T&& arg, Args&&... args) {
__HDI__ static T apply(T&& arg, Args&&... /*args*/) {
return arg;
}
};
@ -49,7 +49,7 @@ struct Capture {
Capture(float val) : value(val){};
template <typename... Args>
__HDI__ float operator()(Args&&... args) {
__HDI__ float operator()(Args&&... /*args*/) {
return value;
}

6
src/functional/predicates.h Normal file → Executable file
View File

@ -51,10 +51,10 @@ struct BinaryFunctor {
template <class X> \
using name = UnaryFunctor<elem::name, X>; \
template <typename X> \
name<IsClass<X>> name2(X x) { \
static inline name<IsClass<X>> name2(X x) { \
return name<X>(x); \
} \
static name<Capture> name2(Capture x) { return name<Capture>(x); }
static inline name<Capture> name2(Capture x) { return name<Capture>(x); }
#define BINARY(name, name2, func) \
namespace elem { \
@ -120,7 +120,7 @@ BINARY(Or, operator||, x || y);
template <typename T>
__HDI__ T sgn(T val) {
return (float(0) < val) - (val < float(0));
return T((0 < val) - (val < 0));
}
UNARY(Sgn, sgn, sgn(x));

2
src/functional/shape.h Normal file → Executable file
View File

@ -89,7 +89,7 @@ struct ConstantShape {
__HDI__ static constexpr size_t size() { return N; }
__HDI__ int elements() const { return elements_; }
__HDI__ int elements() const { return (int)elements_; }
__HDI__ int index(const Array<int, N>& d) const {
int i = 0;

3
src/graph/chainable.h Normal file → Executable file
View File

@ -51,7 +51,8 @@ class ExpressionGraph;
* or formally \f$\bar{w}_i = \frac{\partial y}{\partial w_i}\f$
*/
template <class DataType>
struct Chainable {
class Chainable {
public:
Chainable() {}
virtual ~Chainable(){};

View File

@ -203,7 +203,7 @@ public:
tensors_->throwAtReallocation(true);
backprop();
tensors_->throwAtReallocation(false);
} catch(AllocationException& e) {
} catch(AllocationException&) {
tensors_->throwAtReallocation(false);
return false;
}

8
src/graph/expression_operators.cpp Normal file → Executable file
View File

@ -353,12 +353,12 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
// swap the last two axes
Expr transpose(Expr a) {
std::vector<int> axes(a->shape().size());
for(size_t i = 0; i < axes.size(); ++i) {
for(int i = 0; i < axes.size(); ++i) {
axes[i] = i;
}
if(axes.size() > 1) {
axes[axes.size() - 1] = axes.size() - 2;
axes[axes.size() - 2] = axes.size() - 1;
axes[axes.size() - 1] = (int)axes.size() - 2;
axes[axes.size() - 2] = (int)axes.size() - 1;
}
return Expression<TransposeNodeOp>(a, axes);
}
@ -405,7 +405,7 @@ Expr leakyrelu(const std::vector<Expr>&) {
ABORT("Not implemented");
}
Expr prelu(const std::vector<Expr>&, float alpha) {
Expr prelu(const std::vector<Expr>&, float /*alpha*/) {
ABORT("Not implemented");
}

9
src/graph/node_initializers.cpp Normal file → Executable file
View File

@ -23,7 +23,7 @@ float xor128() {
y = z;
z = w;
w = (w ^ (w >> 19) ^ t ^ (t >> 8)) % 1000;
return 0.1 * ((w % 1000) / 1000.f) - 0.05;
return 0.1f * ((w % 1000) / 1000.f) - 0.05f;
}
void zeros(Tensor t) {
@ -50,7 +50,7 @@ NodeInitializer diag(float val) {
};
}
NodeInitializer normal(float scale, bool ortho /*= true*/) {
NodeInitializer normal(float scale, bool /*ortho*/ /*= true*/) {
return [scale](Tensor t) {
distribution<std::normal_distribution<float>>(t, 0, scale);
};
@ -113,7 +113,10 @@ NodeInitializer from_vector(const std::vector<float>& v) {
}
NodeInitializer from_vector(const std::vector<size_t>& v) {
std::vector<float> vf(v.begin(), v.end());
auto n = v.size();
std::vector<float> vf(n);
for (size_t i = 0; i < n; i++)
vf[i] = (float)v[i];
return from_vector(vf);
}

2
src/graph/node_initializers.h Normal file → Executable file
View File

@ -28,7 +28,7 @@ NodeInitializer diag(float val);
template <class Distribution, class Iterator>
void distribution(Iterator begin, Iterator end, float a, float b) {
std::default_random_engine engine(Config::seed++);
std::default_random_engine engine((unsigned int)Config::seed++);
Distribution dist(a, b);
auto gen = std::bind(dist, engine);
std::generate(begin, end, gen);

0
src/graph/node_operators_binary.h Normal file → Executable file
View File

2
src/graph/parameters.h Normal file → Executable file
View File

@ -119,12 +119,10 @@ public:
virtual Tensor vals() override {
ABORT("Not implemented for memory-mapped parameters");
return nullptr;
}
virtual Tensor grads() override {
ABORT("Not implemented for memory-mapped parameters");
return nullptr;
}
virtual void clear() override {

2
src/layers/generic.h Normal file → Executable file
View File

@ -155,7 +155,7 @@ public:
return affine(input, W_, b_, false, transposeW_);
}
virtual Expr apply(const std::vector<Expr>& inputs) override {
virtual Expr apply(const std::vector<Expr>& /*inputs*/) override {
ABORT("Not implemented");
};
};

8
src/layers/guided_alignment.h Normal file → Executable file
View File

@ -38,13 +38,13 @@ static inline Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
}
Expr alnCost;
float eps = 1e-6;
float epsilon = 1e-6f;
if(guidedCostType == "mse") {
alnCost = sum(flatten(square(att - aln))) / (2 * div);
alnCost = sum(flatten(square(att - aln))) / (float)(2 * div);
} else if(guidedCostType == "mult") {
alnCost = -log(sum(flatten(att * aln)) + eps) / div;
alnCost = -log(sum(flatten(att * aln)) + epsilon) / (float)div;
} else if(guidedCostType == "ce") {
alnCost = -sum(flatten(aln * log(att + eps))) / div;
alnCost = -sum(flatten(aln * log(att + epsilon))) / (float)div;
} else {
ABORT("Unknown alignment cost type");
}

4
src/layers/loss.cpp Normal file → Executable file
View File

@ -24,13 +24,11 @@ Expr LossBase::getCrossEntropy(Expr logits,
Expr indices,
Expr mask,
Expr weights) {
using namespace keywords;
auto ce = cross_entropy(logits, indices);
if(smoothing_ > 0) {
// @TODO: add this to CE kernels instead
auto ceq = mean(logsoftmax(logits), axis = -1);
auto ceq = mean(logsoftmax(logits), /*axis=*/ -1);
ce = (1 - smoothing_) * ce - smoothing_ * ceq;
}

4
src/layers/weight.cpp Normal file → Executable file
View File

@ -13,8 +13,8 @@ Expr DataWeighting::getWeights(Ptr<ExpressionGraph> graph,
ABORT_IF(batch->getDataWeights().empty(),
"Vector of weights is unexpectedly empty!");
bool sentenceWeighting = weightingType_ == "sentence";
int dimBatch = batch->size();
int dimWords = sentenceWeighting ? 1 : batch->back()->batchWidth();
int dimBatch = (int)batch->size();
int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth();
auto weights = graph->constant({1, dimWords, dimBatch, 1},
inits::from_vector(batch->getDataWeights()));
return weights;

4
src/models/amun.h Normal file → Executable file
View File

@ -37,9 +37,7 @@ public:
void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded = true) override {
using namespace keywords;
bool /*markedReloaded*/ = true) override {
std::map<std::string, std::string> nameMap
= {{"decoder_U", "decoder_cell1_U"},
{"decoder_Ux", "decoder_cell1_Ux"},

View File

@ -175,9 +175,9 @@ public:
return cost_->apply(nextState);
}
virtual Expr build(Ptr<ExpressionGraph> graph,
Ptr<data::CorpusBatch> batch,
bool clearGraph = true) override {
virtual Expr build(Ptr<ExpressionGraph> /*graph*/,
Ptr<data::CorpusBatch> /*batch*/,
bool /*clearGraph*/ = true) override {
ABORT("Wrong wrapper. Use models::Trainer or models::Scorer");
return nullptr;
}

6
src/models/decoder.h Normal file → Executable file
View File

@ -60,8 +60,8 @@ public:
auto yEmb = yEmbFactory.construct();
auto subBatch = (*batch)[batchIndex_];
int dimBatch = subBatch->batchSize();
int dimWords = subBatch->batchWidth();
int dimBatch = (int)subBatch->batchSize();
int dimWords = (int)subBatch->batchWidth();
auto chosenEmbeddings = rows(yEmb, subBatch->data());
@ -119,7 +119,7 @@ public:
state->setTargetEmbeddings(selectedEmbs);
}
virtual const std::vector<Expr> getAlignments(int i = 0) { return {}; };
virtual const std::vector<Expr> getAlignments(int /*i*/ = 0) { return {}; };
virtual Ptr<data::Shortlist> getShortlist() { return shortlist_; }
virtual void setShortlist(Ptr<data::Shortlist> shortlist) {

4
src/models/encoder.h Normal file → Executable file
View File

@ -21,9 +21,9 @@ protected:
auto subBatch = (*batch)[batchIndex_];
int dimBatch = subBatch->batchSize();
int dimBatch = (int)subBatch->batchSize();
int dimEmb = srcEmbeddings->shape()[-1];
int dimWords = subBatch->batchWidth();
int dimWords = (int)subBatch->batchWidth();
auto chosenEmbeddings = rows(srcEmbeddings, subBatch->data());

6
src/models/hardatt.h Normal file → Executable file
View File

@ -215,7 +215,7 @@ public:
Expr logits;
if(type == "hard-soft-att") {
std::vector<Expr> alignedContexts;
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
// retrieve all the aligned contexts computed by the attention mechanism
auto att = rnn_->at(0)
->as<rnn::StackedCell>()
@ -257,8 +257,8 @@ public:
DecoderBase::embeddingsFromBatch(graph, state, batch);
auto subBatch = (*batch)[batchIndex_];
int dimBatch = subBatch->batchSize();
int dimWords = subBatch->batchWidth();
int dimBatch = (int)subBatch->batchSize();
int dimWords = (int)subBatch->batchWidth();
std::vector<size_t> attentionIndices(dimBatch, 0);
std::vector<size_t> currentPos(dimBatch, 0);

2
src/models/nematus.h Normal file → Executable file
View File

@ -29,7 +29,7 @@ public:
void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded = true) override {
bool /*markedReloaded*/ = true) override {
graph->load(name, nameMap_);
}

12
src/models/s2s.h Normal file → Executable file
View File

@ -274,7 +274,7 @@ public:
start = mlp->apply(meanContexts);
} else {
int dimBatch = batch->size();
int dimBatch = (int)batch->size();
int dimRnn = opt<int>("dim-rnn");
start = graph->constant({dimBatch, dimRnn}, inits::zeros);
@ -309,7 +309,7 @@ public:
rnn::States decoderStates = rnn_->lastCellStates();
std::vector<Expr> alignedContexts;
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
// retrieve all the aligned contexts computed by the attention mechanism
auto att = rnn_->at(0)
->as<rnn::StackedCell>()
@ -337,7 +337,7 @@ public:
int dimTrgVoc = opt<std::vector<int>>("dim-vocabs")[batchIndex_];
auto final = mlp::output(graph) //
auto last = mlp::output(graph) //
("prefix", prefix_ + "_ff_logit_l2") //
("dim", dimTrgVoc);
@ -345,17 +345,17 @@ public:
std::string tiedPrefix = prefix_ + "_Wemb";
if(opt<bool>("tied-embeddings-all") || opt<bool>("tied-embeddings-src"))
tiedPrefix = "Wemb";
final.tie_transposed("W", tiedPrefix);
last.tie_transposed("W", tiedPrefix);
}
if(shortlist_)
final.set_shortlist(shortlist_);
last.set_shortlist(shortlist_);
// assemble layers into MLP and apply to embeddings, decoder context and
// aligned source context
output_ = mlp::mlp(graph) //
.push_back(hidden) //
.push_back(final)
.push_back(last)
.construct();
}

View File

@ -51,7 +51,7 @@ public:
int dimEmb = input->shape()[-1];
int dimWords = input->shape()[-3];
float num_timescales = dimEmb / 2;
float num_timescales = (float)dimEmb / 2;
float log_timescale_increment = std::log(10000.f) / (num_timescales - 1.f);
std::vector<float> vPos(dimEmb * dimWords, 0);
@ -59,7 +59,7 @@ public:
for(int i = 0; i < num_timescales; ++i) {
float v = p * std::exp(i * -log_timescale_increment);
vPos[(p - start) * dimEmb + i] = std::sin(v);
vPos[(p - start) * dimEmb + num_timescales + i] = std::cos(v);
vPos[(p - start) * dimEmb + (int)num_timescales + i] = std::cos(v); // @TODO: is int vs. float correct for num_timescales?
}
}
@ -134,7 +134,7 @@ public:
int dimModel = x->shape()[-1];
auto scale = graph_->param(prefix + "_ln_scale" + suffix, { 1, dimModel }, inits::ones);
auto bias = graph_->param(prefix + "_ln_bias" + suffix, { 1, dimModel }, inits::zeros);
return marian::layerNorm(x, scale, bias, 1e-6);
return marian::layerNorm(x, scale, bias, 1e-6f);
}
Expr preProcess(std::string prefix, std::string ops, Expr input, float dropProb = 0.0f) const {
@ -212,7 +212,7 @@ public:
// time steps and batch entries), also add mask for illegal connections
// multiplicative attention with flattened softmax
float scale = 1.0 / std::sqrt((float)dk); // scaling to avoid extreme values due to matrix multiplication
float scale = 1.0f / std::sqrt((float)dk); // scaling to avoid extreme values due to matrix multiplication
auto z = bdot(q, k, false, true, scale); // [-4: beam depth * batch size, -3: num heads, -2: max tgt length, -1: max src length]
// mask out garbage beyond end of sequences
@ -425,7 +425,7 @@ public:
auto output = input;
if(startPos > 0) {
// we are decoding at a position after 0
output = (prevDecoderState.output * startPos + input) / (startPos + 1);
output = (prevDecoderState.output * (float)startPos + input) / float(startPos + 1);
}
else if(startPos == 0 && output->shape()[-2] > 1) {
// we are training or scoring, because there is no history and
@ -444,7 +444,7 @@ public:
std::string prefix,
Expr input,
Expr selfMask,
int startPos) const {
int /*startPos*/) const {
float dropoutRnn = inference_ ? 0.f : opt<float>("dropout-rnn");
auto rnn = rnn::rnn(graph_) //
@ -479,7 +479,7 @@ public:
// returns the embedding matrix based on options
// and based on batchIndex_.
Expr wordEmbeddings(int subBatchIndex) const {
Expr wordEmbeddings(size_t subBatchIndex) const {
// standard encoder word embeddings
int dimVoc = opt<std::vector<int>>("dim-vocabs")[subBatchIndex];
@ -513,8 +513,8 @@ public:
Ptr<EncoderState> apply(Ptr<data::CorpusBatch> batch) {
int dimEmb = opt<int>("dim-emb");
int dimBatch = batch->size();
int dimSrcWords = (*batch)[batchIndex_]->batchWidth();
int dimBatch = (int)batch->size();
int dimSrcWords = (int)(*batch)[batchIndex_]->batchWidth();
auto embeddings = wordEmbeddings(batchIndex_); // embedding matrix, considering tying and some other options
@ -531,7 +531,7 @@ public:
}
// according to paper embeddings are scaled up by \sqrt(d_m)
auto scaledEmbeddings = std::sqrt(dimEmb) * batchEmbeddings;
auto scaledEmbeddings = std::sqrt((float)dimEmb) * batchEmbeddings;
scaledEmbeddings = addPositionalEmbeddings(scaledEmbeddings);
@ -637,7 +637,7 @@ public:
std::string layerType = opt<std::string>("transformer-decoder-autoreg", "self-attention");
if (layerType == "rnn") {
int dimBatch = batch->size();
int dimBatch = (int)batch->size();
int dim = opt<int>("dim-emb");
auto start = graph->constant({1, 1, dimBatch, dim}, inits::zeros);
@ -678,12 +678,12 @@ public:
dimBeam = embeddings->shape()[-4];
// according to paper embeddings are scaled by \sqrt(d_m)
auto scaledEmbeddings = std::sqrt(dimEmb) * embeddings;
auto scaledEmbeddings = std::sqrt((float)dimEmb) * embeddings;
// set current target token position during decoding or training. At training
// this should be 0. During translation the current length of the translation.
// Used for position embeddings and creating new decoder states.
int startPos = state->getPosition();
int startPos = (int)state->getPosition();
scaledEmbeddings
= addPositionalEmbeddings(scaledEmbeddings, startPos);
@ -828,7 +828,7 @@ public:
// helper function for guided alignment
// @TODO: const vector<> seems wrong. Either make it non-const or a const& (more efficient but dangerous)
virtual const std::vector<Expr> getAlignments(int i = 0) override {
virtual const std::vector<Expr> getAlignments(int /*i*/ = 0) override {
return alignments_;
}

24
src/optimizers/optimizers.cpp Normal file → Executable file
View File

@ -19,7 +19,7 @@ void Adagrad::updateImpl(Tensor params, Tensor grads) {
alloc_ = New<TensorAllocator>(params->getBackend());
if(!gt_) {
int elements = params->size();
int elements = (int)params->size();
alloc_->reserveExact(params->memory()->size());
alloc_->allocate(gt_, {1, elements});
gt_->set(0.f);
@ -68,13 +68,13 @@ void Adagrad::load(const std::string& name,
}
// get the size of params which should go
size_t shardSize = ceil(totalSize / (float)backends.size());
size_t shardSize = size_t(ceil(totalSize / (float)backends.size()));
size_t id = 0;
for(auto optBase : opts) {
auto opt = std::dynamic_pointer_cast<Adagrad>(optBase);
int size = std::min(shardSize, totalSize);
int size = (int)std::min(shardSize, totalSize);
totalSize -= size;
if(!opt->alloc_)
@ -95,7 +95,7 @@ void Adagrad::load(const std::string& name,
void Adagrad::save(const std::string& name,
std::vector<Ptr<OptimizerBase>> opts,
size_t totalSize) {
size_t /*totalSize*/) {
LOG(info, "Saving Adagrad parameters to {}", name);
std::vector<float> vGt;
@ -130,7 +130,7 @@ void Adam::updateImpl(Tensor params, Tensor grads) {
alloc_ = New<TensorAllocator>(params->getBackend());
if(!mt_) {
int elements = params->size();
int elements = (int)params->size();
alloc_->reserveExact(2 * params->memory()->size());
alloc_->allocate(mt_, {1, elements});
mt_->set(0.f);
@ -140,8 +140,8 @@ void Adam::updateImpl(Tensor params, Tensor grads) {
}
t_++;
float denom1 = 1 - std::pow(beta1_, t_);
float denom2 = 1 - std::pow(beta2_, t_);
float denom1 = 1 - (float)std::pow(beta1_, t_);
float denom2 = 1 - (float)std::pow(beta2_, t_);
using namespace functional;
@ -193,13 +193,13 @@ void Adam::load(const std::string& name,
}
// get the size of params which should go
size_t shardSize = ceil(totalSize / (float)backends.size());
size_t shardSize = size_t(ceil(totalSize / (float)backends.size()));
size_t id = 0;
for(auto optBase : opts) {
auto opt = std::dynamic_pointer_cast<Adam>(optBase);
int size = std::min(shardSize, totalSize);
int size = (int)std::min(shardSize, totalSize);
totalSize -= size;
if(!opt->alloc_)
@ -223,7 +223,7 @@ void Adam::load(const std::string& name,
void Adam::save(const std::string& name,
std::vector<Ptr<OptimizerBase>> opts,
size_t totalSize) {
size_t /*totalSize*/) {
LOG(info, "Saving Adam parameters to {}", name);
std::vector<float> vMt;
@ -267,13 +267,13 @@ void Adam::resetStats() {
}
Ptr<OptimizerBase> Optimizer(Ptr<Config> options) {
float lrate = options->get<double>("learn-rate");
float lrate = (float)options->get<double>("learn-rate"); // @TODO: should this be <float>?
auto params = options->has("optimizer-params")
? options->get<std::vector<float>>("optimizer-params")
: std::vector<float>({});
Ptr<ClipperBase> clipper = nullptr;
float clipNorm = options->get<double>("clip-norm");
float clipNorm = (float)options->get<double>("clip-norm"); // @TODO: should this be <float>?
if(clipNorm > 0)
clipper = Clipper<Norm>(clipNorm);

22
src/optimizers/optimizers.h Normal file → Executable file
View File

@ -63,12 +63,12 @@ public:
void setParams(const std::vector<float>& params) { parseParams(params); }
virtual void load(const std::string& name,
std::vector<Ptr<OptimizerBase>> opts,
std::vector<Ptr<Backend>> backends) {}
virtual void save(const std::string& name,
std::vector<Ptr<OptimizerBase>> opts,
size_t totalSize) {}
virtual void load(const std::string& /*name*/,
std::vector<Ptr<OptimizerBase>> /*opts*/,
std::vector<Ptr<Backend>> /*backends*/) {}
virtual void save(const std::string& /*name*/,
std::vector<Ptr<OptimizerBase>> /*opts*/,
size_t /*totalSize*/) {}
protected:
virtual void updateImpl(Tensor params, Tensor grads) = 0;
@ -94,7 +94,7 @@ public:
private:
void updateImpl(Tensor params, Tensor grads) override;
virtual void parseParams(const std::vector<float>& params) override {}
virtual void parseParams(const std::vector<float>& /*params*/) override {}
virtual void resetStats() override {}
};
@ -124,7 +124,7 @@ private:
eps_ = params[0];
}
float eps_ = 1e-8;
float eps_ = 1e-8f;
Ptr<TensorAllocator> alloc_;
Tensor gt_;
};
@ -159,9 +159,9 @@ private:
eps_ = params[2];
}
float beta1_ = 0.9;
float beta2_ = 0.999;
float eps_ = 1e-8;
float beta1_ = 0.9f;
float beta2_ = 0.999f;
float eps_ = 1e-8f;
size_t t_;
Ptr<TensorAllocator> alloc_;

4
src/rnn/rnn.h Normal file → Executable file
View File

@ -64,10 +64,10 @@ private:
auto xWs = cell_->applyInput({input});
size_t timeSteps = input->shape()[-3];
auto timeSteps = input->shape()[-3];
States outputs;
for(size_t i = 0; i < timeSteps; ++i) {
for(int i = 0; i < timeSteps; ++i) {
int j = i;
if(direction_ == dir::backward)

View File

@ -28,7 +28,7 @@ private:
sel = atleast_4d(sel);
int dimBatch = selIdx.size() / beamSize;
int dimBatch = (int)selIdx.size() / beamSize;
int dimDepth = sel->shape()[-1];
int dimTime = isBatchMajor ? sel->shape()[-2] : sel->shape()[-3];
@ -93,7 +93,7 @@ public:
};
class Cell;
struct CellInput;
class CellInput;
class Stackable : public std::enable_shared_from_this<Stackable> {
protected:

View File

@ -91,7 +91,7 @@ private:
std::unordered_map<uint8_t*, Ptr<MemoryPiece>> allocated_;
size_t align(size_t size) {
return ceil(size / (float)alignment_) * alignment_;
return size_t(ceil(size / (float)alignment_) * alignment_);
}
void grow(size_t add) {
@ -168,7 +168,7 @@ public:
reserve(bytes);
}
Allocator(DeviceId deviceId,
Allocator(DeviceId /*deviceId*/,
Ptr<Device> device,
size_t bytes,
size_t step,

8
src/tensors/cpu/add.h Normal file → Executable file
View File

@ -28,7 +28,7 @@ void gAddGeneric(Functor functor,
constexpr size_t N = functional::Shape::size();
functional::Array<int, N> len;
for(size_t i = 0; i < N; ++i)
for(int i = 0; i < N; ++i)
len[i] = full[i] / out.shape()[i];
functional::Array<int, N> dims;
@ -101,7 +101,7 @@ template <class Functor, class... Tensors>
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
auto full = marian::Shape::broadcast({out, tensors...});
int length = out->shape().elements();
//int length = out->shape().elements();
constexpr size_t K = sizeof...(Tensors);
@ -109,8 +109,8 @@ void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
if(full.back() != 1 && out->shape().back() == 1) {
size_t m = full.elements() / length;
size_t k = full.back();
//size_t m = full.elements() / length;
//size_t k = full.back();
cpu::gAddReduce(functor, full, gOut, gIns, scale);
} else if(out->shape() == full) {
bool broadcast = false;

2
src/tensors/cpu/backend.h Normal file → Executable file
View File

@ -15,7 +15,7 @@ private:
public:
Backend(DeviceId deviceId, size_t seed)
: marian::Backend(deviceId, seed), gen_(seed_) {}
: marian::Backend(deviceId, seed), gen_((unsigned int)seed_) {}
void setDevice() override {}

16
src/tensors/cpu/prod.cpp Normal file → Executable file
View File

@ -131,21 +131,21 @@ void ProdBatched(marian::Tensor C,
auto strideA = batchA == 1 ? 0 : m * k;
auto strideC = n * m;
int batchC = std::max(batchA, batchB);
for(int i = 0; i < batchC; ++i) {
auto batchC = std::max(batchA, batchB);
for(size_t i = 0; i < batchC; ++i) {
sgemm(transA,
transB,
m,
n,
k,
(int)m,
(int)n,
(int)k,
alpha,
A->data() + (i % batchA) * strideA,
lda,
(int)lda,
B->data() + (i % batchB) * strideB,
ldb,
(int)ldb,
beta,
C->data() + i * strideC,
ldc);
(int)ldc);
}
#else
ABORT("Not implemented!");

26
src/tensors/cpu/sharp/int_gemm.h Normal file → Executable file
View File

@ -56,8 +56,8 @@ void SSE_MatrixMult16(const __m128i* A,
static inline void Quantize16(marian::Tensor out,
const marian::Tensor in,
float clipValue) {
float quant_mult = pow(2.0, (float)BITS);
float /*clipValue*/) {
float quant_mult = (float)pow(2.0, BITS);
#ifdef __AVX512F__
AVX_Quantize16(
in->data(), out->data<int16_t>(), quant_mult, in->shape().elements());
@ -76,6 +76,7 @@ static inline void Quantize8(marian::Tensor out,
AVX_Quantize8(
in->data(), out->data<int8_t>(), quant_mult, in->shape().elements());
#else
out; in; clipValue;
ABORT("8-bit is currently only AVX512");
#endif
}
@ -118,19 +119,19 @@ static void AddBias(marian::Tensor C, const marian::Tensor Bias) {
}
}
static void ProdInt16(marian::Tensor C,
const marian::Tensor A,
const marian::Tensor B,
static inline void ProdInt16(marian::Tensor C,
const marian::Tensor A,
const marian::Tensor B,
float scale) {
ABORT_IF(scale != 1, "Scale other than 1 not supported");
// @TODO: make this a parameter
float quant_mult = pow(2.0, (float)BITS);
float quant_mult = (float)pow(2.0, BITS);
// If we quantize to n bits and then multiple the values together, the result
// will be quantized to n^2 bits. So we must divide by 1.0/(n^2) to get back
// the original value.
float unquant_mult = 1.0 / (quant_mult * quant_mult);
float unquant_mult = 1.0f / (quant_mult * quant_mult);
float* fC = C->data();
int num_A_rows = A->shape().elements() / A->shape()[-1];
@ -155,11 +156,11 @@ static void ProdInt16(marian::Tensor C,
#endif
}
static void ProdInt8(marian::Tensor C,
const marian::Tensor A,
const marian::Tensor B,
float scale,
float clipValue) {
static inline void ProdInt8(marian::Tensor C,
const marian::Tensor A,
const marian::Tensor B,
float scale,
float clipValue) {
#ifdef __AVX512F__
// This would be easy...
ABORT_IF(scale != 1, "Scale other than 1 not supported");
@ -178,6 +179,7 @@ static void ProdInt8(marian::Tensor C,
num_B_rows,
width);
#else
C; A; B; scale; clipValue;
ABORT("8-bit is currently only AVX512");
#endif
}

88
src/tensors/cpu/tensor_operators.cpp Normal file → Executable file
View File

@ -17,10 +17,10 @@ namespace cpu {
inline float stableSigmoid(float x) {
if(x >= 0) {
float z = expf(-x);
return 1.0 / (1.0 + z);
return 1.0f / (1.0f + z);
} else {
float z = expf(x);
return z / (1.0 + z);
return z / (1.0f + z);
}
}
@ -228,9 +228,9 @@ void Transpose10(Tensor out, const Tensor in) {
template <bool add>
void TransposeGeneric(Tensor out, Tensor in, const std::vector<int>& vAxis) {
functional::Array<int, functional::Shape::size()> permute;
int diff = functional::Shape::size() - vAxis.size();
for(size_t i = 0; i < permute.size(); ++i)
if((int)i < diff)
int diff = int(functional::Shape::size() - vAxis.size());
for(int i = 0; i < permute.size(); ++i)
if(i < diff)
permute[i] = i;
else
permute[i] = vAxis[i - diff] + diff;
@ -483,7 +483,7 @@ void Select(Tensor out,
for(int index = 0; index < length; ++index) {
outShape.dims(index, dims);
dims[axis] = indices[dims[axis]];
dims[axis] = (int)indices[dims[axis]];
int inIndex = inShape.index(dims);
out->data()[index] = in->data()[inIndex];
}
@ -505,7 +505,7 @@ void Insert(Tensor out,
for(int index = 0; index < length; ++index) {
inShape.dims(index, dims);
dims[axis] = indices[dims[axis]];
dims[axis] = (int)indices[dims[axis]];
int outIndex = outShape.index(dims);
out->data()[outIndex] += in->data()[index];
}
@ -547,8 +547,8 @@ void GRUFastForward(Tensor out_, std::vector<Tensor> inputs, bool final) {
else
h = std::tanh(xWrow[l] + sUrow[l] * r + b[l]);
float out = (1.0f - z) * h + z * rowState[i];
rowOut[i] = m * out + (1 - m) * rowState[i];
float o = (1.0f - z) * h + z * rowState[i];
rowOut[i] = m * o + (1 - m) * rowState[i];
}
}
}
@ -599,16 +599,16 @@ void GRUFastBackward(std::vector<Tensor> outputs,
else
h = std::tanh(rowXW[l] + rowSU[l] * r + b[l]);
float adj = rowAdj[i];
float a = rowAdj[i];
float t = (1 - z) * (1 - h * h);
// df/ds
if(outState)
rowOutState[i] += (m * z - m + 1) * adj;
rowOutState[i] += (m * z - m + 1) * a;
// df/d(xW_r) ...
float dfdxW_r = m * r * (1 - r) * t * adj;
float dfdxW_r = m * r * (1 - r) * t * a;
if(final)
dfdxW_r *= rowSU[l] + b[l];
else
@ -621,7 +621,7 @@ void GRUFastBackward(std::vector<Tensor> outputs,
outB[i] += dfdxW_r;
// df/d(xW_z) ...
float dfdxW_z = m * (1 - z) * z * (rowState[i] - h) * adj;
float dfdxW_z = m * (1 - z) * z * (rowState[i] - h) * a;
if(outXW)
rowOutXW[k] += dfdxW_z;
if(outSU)
@ -630,7 +630,7 @@ void GRUFastBackward(std::vector<Tensor> outputs,
outB[k] += dfdxW_z;
// df/d(xW_x) ...
float dfdxW_x = m * t * adj;
float dfdxW_x = m * t * a;
if(outXW)
rowOutXW[l] += dfdxW_x;
if(outSU)
@ -671,7 +671,7 @@ void CrossEntropyPick(Tensor out_, Tensor in_, Tensor pick_) {
}
// cross-entropy
int i = pick[j];
int i = (int)pick[j];
// This appears to be safe i.e. that i >= 0 && i < cols is known
out[j] = std::log(sum) - sp[i] + max;
}
@ -960,7 +960,7 @@ void Shift(Tensor out_,
float padValue,
bool invert) {
int offset = 0;
for(size_t i = 0; i < shift.size(); ++i)
for(int i = 0; i < shift.size(); ++i)
offset += in_->shape().stride(i) * shift[i];
if(invert)
@ -983,7 +983,7 @@ void Shift(Tensor out_,
void ShiftGrad(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
int offset = 0;
for(size_t i = 0; i < shift.size(); ++i)
for(int i = 0; i < shift.size(); ++i)
offset += in_->shape().stride(i) * shift[i];
if(invert)
@ -1004,7 +1004,7 @@ void ShiftGrad(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
void SetSparse(float* out,
const std::vector<size_t>& indices,
const std::vector<float>& values) {
int length = indices.size();
int length = (int)indices.size();
for(int index = 0; index < length; ++index) {
out[indices[index]] = values[index];
}
@ -1112,15 +1112,15 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
int l = i + 2 * cols;
float gc = std::tanh(xWrow[l] + sUrow[l] + b[l]);
float adj = rowAdj[i];
float a = rowAdj[i];
// dc/dx_{t-1}
if(outCell) {
rowOutCell[i] += (m * gf - m + 1) * adj;
rowOutCell[i] += (m * gf - m + 1) * a;
}
// dc/d(b_f) = dc/d(xW_f) ...
float dcdxf = m * rowCell[i] * gf * (1 - gf) * adj;
float dcdxf = m * rowCell[i] * gf * (1 - gf) * a;
if(outXW) {
rowOutXW[i] += dcdxf;
}
@ -1132,7 +1132,7 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
}
// dc/d(b_i) ...
float dcdb_i = m * gc * gi * (1 - gi) * adj;
float dcdb_i = m * gc * gi * (1 - gi) * a;
if(outXW) {
rowOutXW[k] += dcdb_i;
}
@ -1144,7 +1144,7 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
}
// dc/d(b_c) ...
float dcdxc = m * gi * (1 - gc * gc) * adj;
float dcdxc = m * gi * (1 - gc * gc) * a;
if(outXW) {
rowOutXW[l] += dcdxc;
}
@ -1193,15 +1193,15 @@ void LSTMOutputBackward(std::vector<Tensor> outputs,
float t = std::tanh(rowCell[i]);
float adj = rowAdj[i];
float a = rowAdj[i];
// dc/dc_{t-1}
if(outCell) {
rowOutCell[i] += go * (1 - t * t) * adj;
rowOutCell[i] += go * (1 - t * t) * a;
}
// dc/d(b_o) = dc/d(xW_f) ...
float dcdxo = t * go * (1 - go) * adj;
float dcdxo = t * go * (1 - go) * a;
if(outXW) {
rowOutXW[k] += dcdxo;
}
@ -1240,30 +1240,30 @@ void HighwayForward(Tensor out,
}
}
void HighwayBackward(Tensor out1,
Tensor out2,
Tensor outt,
const Tensor in1,
const Tensor in2,
const Tensor t,
const Tensor adj) {
void HighwayBackward(Tensor /*out1*/,
Tensor /*out2*/,
Tensor /*outt*/,
const Tensor /*in1*/,
const Tensor /*in2*/,
const Tensor /*t*/,
const Tensor /*adj*/) {
ABORT("Not implemented!");
}
void PoolingWithMaskingForward(Tensor out,
Tensor in,
Tensor mask,
int width,
bool isEven) {
void PoolingWithMaskingForward(Tensor /*out*/,
Tensor /*in*/,
Tensor /*mask*/,
int /*width*/,
bool /*isEven*/) {
ABORT("Not implemented!");
}
void PoolingWithMaskingBackward(Tensor adj,
Tensor adjIn,
Tensor in,
Tensor mask,
int width,
bool isEven) {
void PoolingWithMaskingBackward(Tensor /*adj*/,
Tensor /*adjIn*/,
Tensor /*in*/,
Tensor /*mask*/,
int /*width*/,
bool /*isEven*/) {
ABORT("Not implemented!");
}
} // namespace cpu

2
src/tensors/device.h Normal file → Executable file
View File

@ -16,7 +16,7 @@ protected:
size_t alignment_;
size_t align(size_t size) {
return ceil(size / (float)alignment_) * alignment_;
return size_t(ceil(size / (float)alignment_) * alignment_);
}
public:

32
src/tensors/tensor.h Normal file → Executable file
View File

@ -91,7 +91,7 @@ public:
request<float>(),
type_);
float temp;
float temp = 0; // (initialize to keep compiler happy)
if(backend_->getDeviceId().type == DeviceType::cpu) {
std::copy(data() + i, data() + i + 1, &temp);
}
@ -183,16 +183,16 @@ public:
void set(T value) {
if(!matchType<T>(type_)) {
switch(type_) {
case Type::float32: set<float>(value); break;
case Type::float64: set<double>(value); break;
case Type::int8: set<int8_t>(value); break;
case Type::int16: set<int16_t>(value); break;
case Type::int32: set<int32_t>(value); break;
case Type::int64: set<int64_t>(value); break;
case Type::uint8: set<uint8_t>(value); break;
case Type::uint16: set<uint16_t>(value); break;
case Type::uint32: set<uint32_t>(value); break;
case Type::uint64: set<uint64_t>(value); break;
case Type::float32: set<float >((float )value); break;
case Type::float64: set<double >((double )value); break;
case Type::int8: set<int8_t >((int8_t )value); break;
case Type::int16: set<int16_t >((int16_t )value); break;
case Type::int32: set<int32_t >((int32_t )value); break;
case Type::int64: set<int64_t >((int64_t )value); break;
case Type::uint8: set<uint8_t >((uint8_t )value); break;
case Type::uint16: set<uint16_t>((uint16_t)value); break;
case Type::uint32: set<uint32_t>((uint32_t)value); break;
case Type::uint64: set<uint64_t>((uint64_t)value); break;
default:
ABORT(
"Requested type ({}) cannot be converted to underlying type ({})",
@ -273,19 +273,19 @@ public:
else
strm << std::fixed << std::setprecision(0) << std::setfill(' ');
for(size_t i = 0; i < values.size(); ++i) {
for(int i = 0; i < values.size(); ++i) {
std::vector<int> dims;
shape().dims(i, dims);
bool disp = true;
for(size_t j = 0; j < dims.size(); ++j)
for(int j = 0; j < dims.size(); ++j)
disp = disp && (dims[j] < dispCols || dims[j] >= shape()[j] - dispCols);
if(disp) {
if(dims.back() == 0) {
bool par = true;
std::vector<std::string> p;
for(int j = dims.size() - 1; j >= 0; --j) {
for(int j = (int)dims.size() - 1; j >= 0; --j) {
if(dims[j] != 0)
par = false;
@ -307,7 +307,7 @@ public:
strm << " ";
if(dims.back() + 1 == shape().back()) {
for(int j = dims.size() - 1; j >= 0; --j) {
for(int j = (int)dims.size() - 1; j >= 0; --j) {
if(dims[j] + 1 != shape()[j])
break;
strm << "]";
@ -316,7 +316,7 @@ public:
}
bool prev = true;
for(int j = dims.size() - 1; j >= 0; --j) {
for(int j = (int)dims.size() - 1; j >= 0; --j) {
if(j < (int)dims.size() - 1)
prev = prev && dims[j + 1] + 1 == shape()[j + 1];
if(prev && dims[j] + 1 == dispCols && shape()[j] > 2 * dispCols) {

6
src/tensors/tensor_allocator.h Normal file → Executable file
View File

@ -35,7 +35,7 @@ public:
}
void reserve(size_t bytes = 0) {
float mult = bytes / GROW + 1;
auto mult = bytes / GROW + 1;
LOG(info,
"[memory] Extending reserved space to {} MB (device {})",
mult * CHUNK,
@ -78,8 +78,8 @@ public:
Tensor asTensor() {
auto mem = allocator_->memory();
int size = mem->size() / sizeof(float);
return Tensor(new TensorBase(mem, {1, size}, backend_));
auto size = mem->size() / sizeof(float);
return Tensor(new TensorBase(mem, {1, (int)size}, backend_));
}
size_t size() { return allocator_->size() / sizeof(float); }

2
src/training/communicator.cpp Normal file → Executable file
View File

@ -7,7 +7,7 @@ namespace marian {
#ifndef CUDA_FOUND
Ptr<Communicator> createCommunicator(
const std::vector<Ptr<ExpressionGraph>>& graphs,
bool noNccl) {
bool /*noNccl*/) {
return New<DefaultCommunicator>(graphs);
}
#endif

28
src/training/communicator.h Normal file → Executable file
View File

@ -17,8 +17,8 @@ public:
virtual ~Communicator() {}
virtual void foreach(const std::function<void(size_t, int)>& func) {
int totalSize = graphs_[0]->params()->vals()->size();
int shardSize = ceil(totalSize / (float)graphs_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
int pos = 0;
std::vector<std::thread> group;
@ -50,8 +50,8 @@ private:
void init() {
if(tmpTensors_.size() == 0) {
int totalSize = graphs_[0]->params()->vals()->size();
int shardSize = ceil(totalSize / (float)graphs_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
int pos = 0;
for(auto graph : graphs_) {
@ -83,8 +83,8 @@ public:
void scatterReduce() override {
init();
int totalSize = graphs_[0]->params()->vals()->size();
int shardSize = ceil(totalSize / (float)graphs_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
// Gather gradients from different devices into current gradient shards
auto scatter = [this, shardSize](size_t idx, int pos) {
@ -107,8 +107,8 @@ public:
}
void allGather() override {
int totalSize = graphs_[0]->params()->vals()->size();
int shardSize = ceil(totalSize / (float)graphs_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
// Update all graphs with parameter shard
auto gather = [this, shardSize](size_t idx, int pos) {
@ -133,7 +133,7 @@ public:
auto copy = [this, params](size_t idx, int pos) {
// copy parameter shard to each graph
auto subParam
= graphs_[idx]->params()->vals()->subtensor(pos, params[idx]->size());
= graphs_[idx]->params()->vals()->subtensor(pos, (int)params[idx]->size());
params[idx]->copyFrom(subParam);
};
@ -147,7 +147,7 @@ public:
// copy parameter shard to each graph
for(auto graph : graphs_) {
auto subParam
= graph->params()->vals()->subtensor(pos, params[idx]->size());
= graph->params()->vals()->subtensor(pos, (int)params[idx]->size());
subParam->copyFrom(params[idx]);
}
};
@ -162,17 +162,17 @@ public:
// copy parameter shard to each graph, apart from last graph
for(int i = 0; i < (int)graphs_.size() - 1; ++i) {
auto subParam
= graphs_[i]->params()->vals()->subtensor(pos, params[idx]->size());
= graphs_[i]->params()->vals()->subtensor(pos, (int)params[idx]->size());
subParam->copyFrom(params[idx]);
}
// back-up shard from last graph
auto subParamLast = graphs_.back()->params()->vals()->subtensor(
pos, params[idx]->size());
auto subParamLast =
graphs_.back()->params()->vals()->subtensor(pos, (int)params[idx]->size());
params[idx]->copyFrom(subParamLast);
auto subParamFirst
= graphs_[0]->params()->vals()->subtensor(pos, params[idx]->size());
= graphs_[0]->params()->vals()->subtensor(pos, (int)params[idx]->size());
subParamLast->copyFrom(subParamFirst);
};
// execute for each shard

8
src/training/gradient_dropping/sparse_tensor.h Normal file → Executable file
View File

@ -102,6 +102,8 @@ public:
gpu::copy(backend_, ndata, ndata + nsize, data());
gpu::copy(backend_, nindices, nindices + nsize, indices());
}
#else
ndata; nindices; // (unused)
#endif
}
@ -137,6 +139,8 @@ public:
else {
gpu::scatterAdd(t, data(), indices(), size(), offset);
}
#else
t; offset; // (unused)
#endif
}
@ -149,6 +153,8 @@ public:
else {
gpu::scatterUpdate(t, data(), indices(), size(), offset);
}
#else
t; offset; // (unused)
#endif
}
@ -161,6 +167,8 @@ public:
else {
gpu::gather(t, data(), indices(), size(), offset);
}
#else
t; offset; // (unused)
#endif
}

4
src/training/graph_group.h Normal file → Executable file
View File

@ -63,7 +63,7 @@ public:
size_t step = options_->get<size_t>("mini-batch-fit-step");
size_t maxLength = options_->get<size_t>("max-length");
maxLength = std::ceil(maxLength / (float)step) * step;
maxLength = (size_t)(std::ceil(maxLength / (float)step) * step);
// @TODO: ugly
auto toptions = New<Options>();
@ -85,7 +85,7 @@ public:
size_t end = maxBatch;
std::vector<size_t> lengths(numFiles, i);
bool fits = true;
fits = true;
do {
size_t current = (start + end) / 2;

View File

@ -36,7 +36,7 @@ void AsyncGraphGroup::setScheduler(Ptr<Scheduler> scheduler) {
void AsyncGraphGroup::fetchParams(Tensor oldParams,
const std::vector<Tensor>& params,
int device_id) {
int /*device_id*/) {
// @TODO read guard on parameters
int pos = 0;
@ -46,7 +46,7 @@ void AsyncGraphGroup::fetchParams(Tensor oldParams,
[&](int idx, int pos) {
// individual mutex per-shard
std::lock_guard<std::mutex> guard(shardSync_[idx]);
oldParams->subtensor(pos, params[idx]->size())->copyFrom(params[idx]);
oldParams->subtensor((int)pos, (int)params[idx]->size())->copyFrom(params[idx]);
},
idx,
pos));
@ -60,7 +60,7 @@ void AsyncGraphGroup::fetchParams(Tensor oldParams,
void AsyncGraphGroup::pushGradients(Tensor newGrads,
size_t batch_words,
int device_id) {
int /*device_id*/) {
// add instead of copy?
std::vector<std::thread> threads;
int pos = 0;
@ -69,7 +69,7 @@ void AsyncGraphGroup::pushGradients(Tensor newGrads,
[&](int idx, int pos) {
// individual mutex per-shard
std::lock_guard<std::mutex> guard(shardSync_[idx]);
grads_[idx]->copyFrom(newGrads->subtensor(pos, grads_[idx]->size()));
grads_[idx]->copyFrom(newGrads->subtensor(pos, (int)grads_[idx]->size()));
if(scaleLearningRate_) {
shardOpt_[idx]->update(
@ -105,8 +105,8 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
}
if(params_.empty()) {
int totalSize = graphs_[0]->params()->vals()->size();
shardSize_ = ceil(totalSize / (float)devices_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
shardSize_ = (int)ceil(totalSize / (float)devices_.size());
int pos = 0;
// parameter sharding
@ -128,7 +128,7 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
}
}
if(grads_.empty()) {
int totalSize = graphs_[0]->params()->vals()->size();
int totalSize = (int)graphs_[0]->params()->vals()->size();
for(auto graph : graphs_) {
int __size__ = std::min(shardSize_, totalSize);
@ -154,7 +154,7 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
graphAvg->forward();
}
int totalSize = graphs_[0]->params()->vals()->size();
int totalSize = (int)graphs_[0]->params()->vals()->size();
int i = 0;
for(auto graph : graphs_) {
@ -203,7 +203,7 @@ void AsyncGraphGroup::execute(Ptr<data::Batch> batch) {
if(!graph) {
std::lock_guard<std::mutex> lock(sync_);
t_id = i;
t_id = (int)i;
graph = graphs_[i];
builder = builders_[i++];
}

12
src/training/graph_group_async_drop.cpp Normal file → Executable file
View File

@ -31,7 +31,7 @@ void AsyncGraphGroupDrop::fetchParams(Tensor oldParams,
sparseShard->gather(params[idx]);
sparseGrad->copyFrom(sparseShard);
sparseGrad->scatterUpdate(
oldParams->subtensor(pos, params[idx]->size()));
oldParams->subtensor((int)pos, (int)params[idx]->size()));
},
idx,
pos));
@ -59,7 +59,7 @@ void AsyncGraphGroupDrop::pushGradients(Tensor newGrads,
auto dropper = droppers_[device_id][idx];
auto sparseGrad = sparseGrads_[device_id][idx];
auto sparseShard = sparseShards_[device_id][idx];
auto tensor = newGrads->subtensor(pos, grads_[idx]->size());
auto tensor = newGrads->subtensor((int)pos, (int)grads_[idx]->size());
// individual mutex per-shard
std::lock_guard<std::mutex> guard(shardSync_[idx]);
@ -107,8 +107,8 @@ void AsyncGraphGroupDrop::init(Ptr<data::Batch> batch) {
fetch_ready.push_back(false);
// Size of the sparse tensor
int totalSize = graphs_[0]->params()->vals()->size();
int sparseCap = totalSize * 1.2 * (1.0 - droping_rate);
int totalSize = (int)graphs_[0]->params()->vals()->size();
int sparseCap = (int)(totalSize * 1.2 * (1.0 - droping_rate));
// prepare droppers
std::vector<GradientDrop> tmpDropper;
@ -120,13 +120,13 @@ void AsyncGraphGroupDrop::init(Ptr<data::Batch> batch) {
std::vector<SparseTensor> tmp;
for(int j = 0; j < devices_.size(); j++)
tmp.push_back(SparseTensor(new SparseTensorBase(
sparseCap / devices_.size(), graphs_[i]->getBackend())));
sparseCap / (int)devices_.size(), graphs_[i]->getBackend())));
sparseGrads_.push_back(tmp);
std::vector<SparseTensor> tmp2;
for(int j = 0; j < devices_.size(); j++)
tmp2.push_back(SparseTensor(new SparseTensorBase(
sparseCap / devices_.size(), graphs_[j]->getBackend())));
sparseCap / (int)devices_.size(), graphs_[j]->getBackend())));
sparseShards_.push_back(tmp2);
}
drop_first = false;

22
src/training/graph_group_multinode.cpp Normal file → Executable file
View File

@ -60,8 +60,8 @@ void MultiNodeGraphGroup::init(Ptr<data::Batch> batch) {
for(int i = 0; i < mpi_comm_world_size_; i++) {
// Shard buffers across GPUs
auto backend = clientGraphs_[i % devices_.size()]->getBackend();
Tensor accGrad = newTensor(nodeSizes_[i], backend);
Tensor accGradBuff = newTensor(nodeSizes_[i], backend);
Tensor accGrad = newTensor((int)nodeSizes_[i], backend);
Tensor accGradBuff = newTensor((int)nodeSizes_[i], backend);
accGradients.push_back(accGrad);
accGradientBuffer.push_back(accGradBuff);
}
@ -113,7 +113,7 @@ void MultiNodeGraphGroup::runBatchThroughClientGraphs(Ptr<data::Batch> batch) {
*/
void MultiNodeGraphGroup::calculateNodeSizes() {
size_t modelSize = clientGraphs_[0]->params()->vals()->size();
size_t nodeSize = ceilf(((float)modelSize) / mpi_comm_world_size_);
size_t nodeSize = (size_t)ceilf(((float)modelSize) / mpi_comm_world_size_);
for(int node = 0; node < mpi_comm_world_size_; node++) {
size_t remainingModelSize = modelSize - (nodeSize * node);
// Takes care of edge case where last node is smaller than the others
@ -166,11 +166,11 @@ void MultiNodeGraphGroup::initClientCommOverlapGpuTensors() {
for(size_t client = 0; client < devices_.size(); client++) {
// Communication overlap buffer (for grads + params)
Tensor commOverlapBuffer
= newTensor(modelSize, clientGraphs_[client]->getBackend());
= newTensor((int)modelSize, clientGraphs_[client]->getBackend());
commOverlapBuffer->copyFrom(clientGraphs_[0]->params()->vals());
clientCommOverlapBuffersGPU_.push_back(commOverlapBuffer);
// Gradients local sum buffer
Tensor sumGrads = newTensor(modelSize, clientGraphs_[client]->getBackend());
Tensor sumGrads = newTensor((int)modelSize, clientGraphs_[client]->getBackend());
sumGrads->set(0);
clientSummedGradsGPU.push_back(sumGrads);
// Local optimizer to apply summed gradients
@ -207,7 +207,7 @@ void MultiNodeGraphGroup::setupServerShards() {
*/
void MultiNodeGraphGroup::calculateShardSizes() {
size_t nodeSize = nodeSizes_[mpi_my_rank_];
size_t shardSize = ceilf(((float)nodeSize) / devices_.size());
size_t shardSize = (size_t)ceilf(((float)nodeSize) / devices_.size());
for(size_t shard = 0; shard < devices_.size(); shard++) {
size_t remainingNodeSize = nodeSize - (shardSize * shard);
// Takes care of edge case where last shard is smaller than the others
@ -226,12 +226,12 @@ void MultiNodeGraphGroup::initShardGpuTensors() {
}
for(size_t shard = 0; shard < devices_.size(); shard++) {
Tensor gpuParams
= newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend());
= newTensor((int)shardSizes_[shard], clientGraphs_[shard]->getBackend());
gpuParams->copyFrom(clientGraphs_[0]->params()->vals()->subtensor(
offset, shardSizes_[shard]));
(int)offset, (int)shardSizes_[shard]));
shardParams_.push_back(gpuParams);
shardGrads_.push_back(
newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend()));
newTensor((int)shardSizes_[shard], clientGraphs_[shard]->getBackend()));
offset += shardSizes_[shard];
}
}
@ -519,6 +519,8 @@ void MultiNodeGraphGroup::synchronizeWithServerShards(Tensor newGrads,
offset += nodeSize;
}
#else
newGrads; oldParams; gpu; batchWords; // (unused)
#endif
}
@ -578,7 +580,7 @@ void MultiNodeGraphGroup::execute(Ptr<data::Batch> batch) {
if(!clientCommOverlap) {
synchronizeWithServerShards(graph->params()->grads(),
graph->params()->vals(),
my_id,
(int)my_id,
batch->wordsTrg());
}

2
src/training/graph_group_multinode.h Normal file → Executable file
View File

@ -388,7 +388,7 @@ protected:
numberClientsOfNodes_ = std::vector<int>(mpi_comm_world_size_, 0);
while(index < deviceConfig.size()) {
if(numberClientsOfNodes_[node] == 0) {
numberClientsOfNodes_[node] = deviceConfig[index];
numberClientsOfNodes_[node] = (int)deviceConfig[index];
nClientsSeen = 0;
} else if(nClientsSeen < numberClientsOfNodes_[node]) {
if(node == mpi_my_rank_) {

2
src/training/graph_group_multinode_sync.cpp Normal file → Executable file
View File

@ -45,7 +45,7 @@ Tensor MultiNodeGraphGroupSync::newTensor(int size, Ptr<Backend> backend) {
void MultiNodeGraphGroupSync::init(Ptr<data::Batch> batch) {
// Setup clients and shards
setupClients(batch);
int network_size = clientGraphs_[0]->params()->vals()->size();
int network_size = (int)clientGraphs_[0]->params()->vals()->size();
LOG(info, "model size = {} float params", network_size);
if(movingAvg_)
paramsAvg_ = newTensor(network_size, clientGraphs_.back()->getBackend());

2
src/training/graph_group_multinode_sync.h Normal file → Executable file
View File

@ -166,7 +166,7 @@ protected:
numberClientsOfNodes_ = std::vector<int>(mpi_comm_world_size_, 0);
while(index < deviceConfig.size()) {
if(numberClientsOfNodes_[node] == 0) {
numberClientsOfNodes_[node] = deviceConfig[index];
numberClientsOfNodes_[node] = (int)deviceConfig[index];
nClientsSeen = 0;
} else if(nClientsSeen < numberClientsOfNodes_[node]) {
if(node == mpi_my_rank_) {

12
src/training/graph_group_sync.cpp Normal file → Executable file
View File

@ -64,8 +64,8 @@ void SyncGraphGroup::initializeAvg() {
graphAvg->forward();
}
int totalSize = graphs_[0]->params()->vals()->size();
shardSize_ = ceil(totalSize / (float)devices_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
shardSize_ = (int)ceil(totalSize / (float)devices_.size());
int pos = 0;
for(auto graph : graphs_) {
@ -99,7 +99,7 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
size_t devs = devices_.size();
auto batches = batch->split(delay_ * devs);
float div = batches.size(); // no. of batches
float div = (float)batches.size(); // no. of batches
// do not average gradients if cost type is sum.
if(options_->get<std::string>("cost-type") == "ce-sum")
div = 1;
@ -131,7 +131,7 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
}
// Execute single forward/backward step
auto forwardBackward = [this, &costs, curBatches, t](size_t idx, int pos) {
auto forwardBackward = [this, &costs, curBatches, t](size_t idx, int /*pos*/) {
auto graph = graphs_[idx];
auto batch = curBatches[idx];
@ -152,8 +152,8 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
// Update parameter shard with gradient shard
auto update = [this, div](size_t idx, int pos) {
int totalSize = graphs_[0]->params()->vals()->size();
int shardSize = ceil(totalSize / (float)devices_.size());
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)devices_.size());
int size = std::min(totalSize - pos, shardSize);

18
src/training/scheduler.h Normal file → Executable file
View File

@ -20,12 +20,12 @@ private:
float getLearningRate(TrainingState& state) {
float baselr = options_->get<float>("learn-rate");
float bno = state.batches - state.warmupStart;
auto bno = state.batches - state.warmupStart;
size_t warmup = options_->get<size_t>("lr-warmup");
float mult1 = 1.f;
if(warmup > 0) {
mult1 = std::min(1.f, bno / (float)warmup);
mult1 = std::min(1.f, (float)bno / (float)warmup);
}
size_t decayGoogle = options_->get<size_t>("lr-decay-inv-sqrt");
@ -164,8 +164,8 @@ public:
void update(float cost, const std::vector<Ptr<data::Batch>>& batches) {
state_->validated = false;
auto batchSize = 0; // number of sentences in batch
auto batchLabels = 0; // number of target words in batch
size_t batchSize = 0; // number of sentences in batch
size_t batchLabels = 0; // number of target words in batch
for(const auto& batch : batches) {
batchSize += batch->size();
@ -303,7 +303,7 @@ public:
}
void actAfterEpoch(TrainingState& state) override {
float factor = options_->get<double>("lr-decay");
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
float baselr = getLearningRate(state);
state.eta = baselr * state.factor;
@ -355,7 +355,7 @@ public:
}
void actAfterBatches(TrainingState& state) override {
float factor = options_->get<double>("lr-decay");
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
state.reset = false;
float baselr = getLearningRate(state);
@ -365,7 +365,7 @@ public:
if("batches" == options_->get<std::string>("lr-decay-strategy")) {
size_t start
= options_->get<std::vector<size_t>>("lr-decay-start").front();
int freq = options_->get<size_t>("lr-decay-freq");
size_t freq = options_->get<size_t>("lr-decay-freq");
if(start > 0 && freq > 0 && state.batches >= start
&& ((state.batches - start) % freq == 0)) {
@ -403,7 +403,7 @@ public:
}
void actAfterStalled(TrainingState& state) override {
float factor = options_->get<double>("lr-decay");
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
state.reset = false;
float baselr = getLearningRate(state);
@ -411,7 +411,7 @@ public:
if(factor > 0.0) {
if(options_->get<std::string>("lr-decay-strategy") == "stalled") {
int startStalled
size_t startStalled
= options_->get<std::vector<size_t>>("lr-decay-start").front();
if(startStalled && state.stalled && state.stalled % startStalled == 0) {
state.factor *= factor;

10
src/training/training_state.h Normal file → Executable file
View File

@ -11,11 +11,11 @@ class TrainingState;
class TrainingObserver {
public:
virtual void init(TrainingState& state) {}
virtual void actAfterEpoch(TrainingState& state) {}
virtual void actAfterBatches(TrainingState& state) {}
virtual void actAfterStalled(TrainingState& state) {}
virtual void actAfterLoaded(TrainingState& state) {}
virtual void init(TrainingState&) {}
virtual void actAfterEpoch(TrainingState&) {}
virtual void actAfterBatches(TrainingState&) {}
virtual void actAfterStalled(TrainingState&) {}
virtual void actAfterLoaded(TrainingState&) {}
};
class TrainingState {

18
src/training/validator.h Normal file → Executable file
View File

@ -209,7 +209,7 @@ public:
auto command = options_->get<std::string>("valid-script-path");
auto valStr = utils::Exec(command);
float val = std::atof(valStr.c_str());
float val = (float)std::atof(valStr.c_str());
updateStalled(graphs, val);
return val;
@ -219,8 +219,8 @@ public:
protected:
virtual float validateBG(
const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
return 0;
}
};
@ -332,7 +332,7 @@ public:
std::stringstream best1;
std::stringstream bestn;
printer->print(history, best1, bestn);
collector->Write(history->GetLineNum(),
collector->Write((long)history->GetLineNum(),
best1.str(),
bestn.str(),
options_->get<bool>("n-best"));
@ -357,7 +357,7 @@ public:
auto command
= options_->get<std::string>("valid-script-path") + " " + fileName;
auto valStr = utils::Exec(command);
val = std::atof(valStr.c_str());
val = (float)std::atof(valStr.c_str());
updateStalled(graphs, val);
}
@ -370,8 +370,8 @@ protected:
bool quiet_{false};
virtual float validateBG(
const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
return 0;
}
};
@ -562,8 +562,8 @@ protected:
}
virtual float validateBG(
const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
return 0;
}
};

View File

@ -15,8 +15,8 @@ private:
Ptr<Options> options_;
std::vector<Ptr<Scorer>> scorers_;
size_t beamSize_;
Word trgEosId_ = -1;
Word trgUnkId_ = -1;
Word trgEosId_ = (Word)-1;
Word trgUnkId_ = (Word)-1;
public:
BeamSearch(Ptr<Options> options,
@ -49,8 +49,8 @@ public:
for(size_t i = 0; i < keys.size(); ++i) {
// Keys contains indices to vocab items in the entire beam.
// Values can be between 0 and beamSize * vocabSize.
int embIdx = keys[i] % vocabSize;
int beamIdx = i / beamSize;
size_t embIdx = keys[i] % vocabSize;
auto beamIdx = i / beamSize;
// Retrieve short list for final softmax (based on words aligned
// to source sentences). If short list has been set, map the indices
@ -63,15 +63,15 @@ public:
auto& beam = beams[beamIdx];
auto& newBeam = newBeams[beamIdx];
int hypIdx = keys[i] / vocabSize;
size_t hypIdx = keys[i] / vocabSize;
float pathScore = pathScores[i];
int hypIdxTrans
size_t hypIdxTrans
= (hypIdx / beamSize) + (hypIdx % beamSize) * beams.size();
if(first)
hypIdxTrans = hypIdx;
int beamHypIdx = hypIdx % beamSize;
size_t beamHypIdx = hypIdx % beamSize;
if(beamHypIdx >= (int)beam.size())
beamHypIdx = beamHypIdx % beam.size();
@ -85,7 +85,7 @@ public:
std::vector<float> breakDown(states.size(), 0);
beam[beamHypIdx]->GetScoreBreakdown().resize(states.size(), 0);
for(size_t j = 0; j < states.size(); ++j) {
int key = embIdx + hypIdxTrans * vocabSize;
size_t key = embIdx + hypIdxTrans * vocabSize;
breakDown[j] = states[j]->breakDown(key)
+ beam[beamHypIdx]->GetScoreBreakdown()[j];
}
@ -95,7 +95,7 @@ public:
// Set alignments
if(!align.empty()) {
hyp->SetAlignment(
getAlignmentsForHypothesis(align, batch, beamHypIdx, beamIdx));
getAlignmentsForHypothesis(align, batch, (int)beamHypIdx, (int)beamIdx));
}
newBeam.push_back(hyp);
@ -156,7 +156,7 @@ public:
// main decoding function
Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
int dimBatch = batch->size();
int dimBatch = (int)batch->size();
Histories histories;
for(int i = 0; i < dimBatch; ++i) {
@ -212,7 +212,7 @@ public:
} else {
std::vector<float> beamScores;
int dimBatch = batch->size();
dimBatch = (int)batch->size();
for(size_t i = 0; i < localBeamSize; ++i) {
for(size_t j = 0; j < beams.size(); ++j) { // loop over batch entries (active sentences)
@ -240,7 +240,7 @@ public:
for(size_t i = 0; i < scorers_.size(); ++i) {
states[i] = scorers_[i]->step(
graph, states[i], hypIndices, embIndices, dimBatch, localBeamSize);
graph, states[i], hypIndices, embIndices, dimBatch, (int)localBeamSize);
if(scorers_[i]->getWeight() != 1.f)
pathScores = pathScores + scorers_[i]->getWeight() * states[i]->getLogProbs();

View File

@ -19,7 +19,7 @@ void SetColumn(Tensor in_, size_t col, float value) {
float* in = in_->data();
for(int rowNumber = 0; rowNumber < nRows; ++rowNumber) {
int index = col + rowNumber * nColumns;
auto index = col + rowNumber * nColumns;
in[index] = value;
}
}

View File

@ -30,8 +30,8 @@ void NthElementCPU::getNBestList(float* scores,
std::vector<int> idxs(numProbs);
std::iota(idxs.begin(), idxs.end(), 0);
int numBatches = batchFirstElementIdxs.size() - 1;
for(int batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
size_t numBatches = batchFirstElementIdxs.size() - 1;
for(size_t batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
int pos = cumulativeBeamSizes[batchIdx];
int beamSize = cumulativeBeamSizes[batchIdx + 1] - pos;
@ -61,9 +61,9 @@ void NthElementCPU::getNBestList(const std::vector<size_t>& beamSizes,
std::vector<int> cumulativeBeamSizes(beamSizes.size() + 1, 0);
std::vector<int> batchFirstElementIdxs(beamSizes.size() + 1, 0);
size_t vocabSize = scores->shape()[-1];
for(size_t i = 0; i < beamSizes.size(); ++i) {
cumulativeBeamSizes[i + 1] = cumulativeBeamSizes[i] + beamSizes[i];
auto vocabSize = scores->shape()[-1];
for(int i = 0; i < beamSizes.size(); ++i) {
cumulativeBeamSizes[i + 1] = cumulativeBeamSizes[i] + (int)beamSizes[i];
batchFirstElementIdxs[i + 1]
+= (isFirst ? i + 1 : cumulativeBeamSizes[i + 1]) * vocabSize;
}

2
src/translator/output_collector.h Normal file → Executable file
View File

@ -29,7 +29,7 @@ class GeometricPrinting : public PrintingStrategy {
public:
bool shouldBePrinted(long id) override {
if(id == 0)
next_ = start_;
next_ = (long)start_;
if(id <= 5)
return true;
if(next_ == id) {

2
src/translator/output_printer.cpp Normal file → Executable file
View File

@ -24,8 +24,6 @@ std::string OutputPrinter::getAlignment(const Ptr<Hypothesis>& hyp) {
} else {
ABORT("Unrecognized word alignment type");
}
return "";
}
} // namespace marian

4
src/translator/scorers.cpp Normal file → Executable file
View File

@ -69,7 +69,7 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options) {
try {
if(!options->get<bool>("ignore-model-config"))
modelOptions->loadModelParameters(model);
} catch(std::runtime_error& e) {
} catch(std::runtime_error&) {
LOG(warn, "No model settings found in model file");
}
@ -96,7 +96,7 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options,
try {
if(!options->get<bool>("ignore-model-config"))
modelOptions->loadModelParameters(ptr);
} catch(std::runtime_error& e) {
} catch(std::runtime_error&) {
LOG(warn, "No model settings found in model file");
}

2
src/translator/translator.h Normal file → Executable file
View File

@ -111,7 +111,7 @@ public:
std::stringstream best1;
std::stringstream bestn;
printer->print(history, best1, bestn);
collector->Write(history->GetLineNum(),
collector->Write((long)history->GetLineNum(),
best1.str(),
bestn.str(),
options_->get<bool>("n-best"));

View File

@ -63,14 +63,14 @@
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level1</WarningLevel>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>MKL_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>false</TreatWarningAsError>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MultiThreadedDebugDLL</RuntimeLibrary>
<DisableSpecificWarnings>4996;4244</DisableSpecificWarnings>
<DisableSpecificWarnings>4996; 4702</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
</ClCompile>
@ -79,6 +79,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>zlib.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>100000000</StackReserveSize>
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -93,12 +94,12 @@
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>false</TreatWarningAsError>
<TreatWarningAsError>true</TreatWarningAsError>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreadedDLL</RuntimeLibrary>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_NoOpt|x64'">MultiThreaded</RuntimeLibrary>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<OmitFramePointers>true</OmitFramePointers>
<DisableSpecificWarnings>4996</DisableSpecificWarnings>
<DisableSpecificWarnings>4996; 4702</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<Link>
@ -108,6 +109,7 @@
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>zlib.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>100000000</StackReserveSize>
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
</Link>
</ItemDefinitionGroup>
<ItemGroup>

View File

@ -220,9 +220,6 @@
<ClCompile Include="..\src\common\binary.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\src\command\marian_conv.cpp">
<Filter>command</Filter>
</ClCompile>
<ClCompile Include="..\src\data\alignment.cpp">
<Filter>data</Filter>
</ClCompile>
@ -241,9 +238,6 @@
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp">
<Filter>3rd_party\yaml-cpp</Filter>
</ClCompile>
<ClCompile Include="..\src\command\marian.cpp">
<Filter>command</Filter>
</ClCompile>
<ClCompile Include="..\src\training\graph_group_multinode_sync.cpp">
<Filter>training</Filter>
</ClCompile>
@ -1033,6 +1027,13 @@
<ClInclude Include="..\src\command\marian_vocab.cpp">
<Filter>command</Filter>
</ClInclude>
<ClInclude Include="..\src\command\marian.cpp">
<Filter>command</Filter>
</ClInclude>
<ClInclude Include="..\src\command\marian_decoder.cpp">
<Filter>command</Filter>
</ClInclude>
<ClInclude Include="..\src\command\marian_conv.cpp" />
</ItemGroup>
<ItemGroup>
<Filter Include="3rd_party">