minibatch-size warmup (manually merged over from fseide/covbias);

minibatches are now fed in GPU-sized chunks rather than a massive joint batch for all GPUs in the update;
Adam hyper-parameter adjustment limited to learning rate, as momentum adjustment is counterproductive for MB scaling;
log output now includes the last batch size;
log output now shows current best for stalled validation metrics;
bug fix: Adam optimizer should persist denominators;
bug fix: Adam and Adagrad should use correct element size when persisting;
min and max renamed to minimum and maximum, for consistency with other toolkits;
pathie now compiles in manual VS Project
This commit is contained in:
Frank Seide 2018-12-12 18:40:46 -08:00
parent 66f05527d9
commit 4cf97df51e
51 changed files with 1132 additions and 653 deletions

2
src/3rd_party/ExceptionWithCallStack.h vendored Normal file → Executable file
View File

@ -5,6 +5,8 @@
// ExceptionWithCallStack.h - debug util functions
//
#pragma once
#include <string>
namespace Microsoft { namespace MSR { namespace CNTK {

2
src/3rd_party/pathie-cpp/src/entry_iterator.cpp vendored Normal file → Executable file
View File

@ -178,7 +178,7 @@ entry_iterator& entry_iterator::operator++(int)
/// Same as the other operator++().
entry_iterator& entry_iterator::operator++()
{
return (operator++());
return (operator++(0));
}
/**

12
src/3rd_party/pathie-cpp/src/path.cpp vendored Normal file → Executable file
View File

@ -51,7 +51,7 @@
#include <shlwapi.h>
//#include <ntifs.h> // Currently not in msys2
// @TODO: This is a hack to make it compile under Windows, check if this is save.
// @TODO: This is a hack to make it compile under Windows, check if this is safe.
#define F_OK 0
#elif defined(_PATHIE_UNIX)
@ -1546,7 +1546,7 @@ bool Path::is_directory() const
throw(Pathie::ErrnoError(errsav));
}
return s.st_mode & S_IFDIR;
return (s.st_mode & S_IFDIR) != 0;
#else
#error Unsupported system.
#endif
@ -1590,7 +1590,7 @@ bool Path::is_file() const
throw(Pathie::ErrnoError(errno));
}
return s.st_mode & S_IFREG;
return (s.st_mode & S_IFREG) != 0;
#else
#error Unsupported system.
#endif
@ -1710,9 +1710,9 @@ void Path::remove() const
* function uses the apropriate native Win32API function
* calls accordingly therefore. */
if (is_directory())
result = RemoveDirectoryW(utf16.c_str());
result = RemoveDirectoryW(utf16.c_str()) != 0;
else
result = DeleteFileW(utf16.c_str());
result = DeleteFileW(utf16.c_str()) != 0;
if (!result) {
DWORD err = GetLastError();
@ -3282,7 +3282,7 @@ bool Path::fnmatch(const std::string& pattern, int flags /* = 0 */) const
#elif defined(_WIN32)
std::wstring utf16path = utf8_to_utf16(m_path);
std::wstring utf16pattern = utf8_to_utf16(pattern);
return PathMatchSpecW(utf16path.c_str(), utf16pattern.c_str());
return PathMatchSpecW(utf16path.c_str(), utf16pattern.c_str()) != 0;
#else
#error Unsupported system.
#endif

@ -1 +1 @@
Subproject commit 1a38d26a13cc67b1aae641d4983b624bef6d5305
Subproject commit 21309542e69e1821ff8e905fa60d8852ac12a73f

View File

@ -11,6 +11,8 @@
#include "training/graph_group_multinode.h"
#endif
#include "3rd_party/ExceptionWithCallStack.h"
int main(int argc, char** argv) {
using namespace marian;

View File

@ -335,9 +335,10 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
"Reset running statistics of optimizer whenever learning rate decays");
cli.add<bool>("--lr-decay-repeat-warmup",
"Repeat learning rate warmup when learning rate is decayed");
cli.add<std::string/*SchedulerPeriod*/>("--lr-decay-inv-sqrt",
"Decrease learning rate at arg / sqrt(no. batches) starting at arg (append 't' or 'e' for sqrt(target labels or epochs))",
"0");
cli.add<std::vector<std::string/*SchedulerPeriod*/>>("--lr-decay-inv-sqrt",
"Decrease learning rate at arg / sqrt(no. batches) starting at arg (append 't' or 'e' for sqrt(target labels or epochs)). "
"Add second argument to define the starting point",
{"0"});
cli.add<std::string/*SchedulerPeriod*/>("--lr-warmup",
"Increase learning rate linearly for arg first batches (append 't' for arg first target labels)",
@ -354,9 +355,10 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
cli.add<double>("--clip-norm",
"Clip gradient norm to argcli.add<int>(0 to disable)",
1.f);
cli.add<float>("--exponential-smoothing",
"Maintain smoothed version of parameters for validation and saving with smoothing factor. 0 to disable",
0)->implicit_val("1e-4");
cli.add<std::vector<float>>("--exponential-smoothing",
"Maintain smoothed version of parameters for validation and saving with smoothing factor. 0 to disable. "
"Add a second number to specify a reference batch size (in target words).",
{ 0.f })->implicit_val("1e-4");
cli.add<std::string>("--guided-alignment",
"Path to a file with word alignments. Use guided alignment to guide attention or 'none'",
"none");
@ -604,6 +606,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
cli.add<bool>("--shuffle-in-ram",
"Keep shuffled corpus in RAM, do not write to temp file");
cli.add<std::vector<std::string/*SchedulerPeriod*/>>("--mini-batch-warmup",
"linear ramp-up of MB size, up to this #updates (append 't' for up to this #target labels);"
"optional second number is reference batch size at which to stop scaling up (instead of full batch size)",
{"0"});
cli.add<bool>("--mini-batch-track-lr",
"Dynamically track mini-batch size inverse to actual learning rate (not considering lr-warmup)");
// clang-format on
}

View File

@ -51,8 +51,16 @@ struct DeviceId {
DeviceId() : no{0}, type{DeviceType::gpu} {}
DeviceId(size_t no_, DeviceType type_) : no(no_), type(type_) {}
std::string typeAsString() const {
return (type == DeviceType::gpu ? "gpu" : "cpu");
}
operator std::string() const {
return typeAsString() + std::to_string(no);
}
friend std::ostream& operator<<(std::ostream& out, DeviceId deviceId) {
out << (deviceId.type == DeviceType::gpu ? "gpu" : "cpu") << deviceId.no;
out << std::string(deviceId);
return out;
}

View File

@ -178,11 +178,9 @@ public:
bool empty() { return istream_->peek() == std::ifstream::traits_type::eof(); }
void setbufsize(size_t size) const {
#ifdef 0 // this is buggy, do nothing
istream_->rdbuf()->pubsetbuf(0, 0);
readBuf_.reset(new char[size]);
istream_->rdbuf()->pubsetbuf(readBuf_.get(), 0);
#endif
readBuf_.resize(size);
istream_->rdbuf()->pubsetbuf(readBuf_.data(), readBuf_.size());
}
template <typename T>
@ -206,9 +204,8 @@ private:
std::unique_ptr<std::istream> istream_;
boost::iostreams::file_descriptor_source fds_;
mutable std::vector<char> readBuf_; // for setbuf()
std::unique_ptr<boost::iostreams::stream_buffer<boost::iostreams::file_descriptor_source>> fdsBuffer_;
mutable UPtr<char[]> readBuf_; // for setbuf()
};
// wrapper around std::getline() that handles Windows input files with extra CR

View File

@ -12,7 +12,7 @@
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#include "3rd_party/pathie-cpp/include/path.hpp"
#include "3rd_party/pathie-cpp/include/path.hpp" // @TODO: update to latest Pathie
#include "3rd_party/pathie-cpp/include/errors.hpp"
#ifdef __GNUC__

View File

@ -128,10 +128,12 @@ void saveItemsNpz(const std::string& fileName, const std::vector<Item>& items) {
std::vector<cnpy::NpzItem> npzItems;
for(auto& item : items) {
std::vector<unsigned int> shape(item.shape.begin(), item.shape.end());
char type = 'f';
char type;
if(item.type == Type::float32)
type = cnpy::map_type(typeid(float));
else if(item.type == Type::float64)
type = cnpy::map_type(typeid(double));
else if(item.type == Type::int8)
type = cnpy::map_type(typeid(char));
else

View File

@ -84,7 +84,7 @@ void createLoggers(const marian::Config* options) {
bool quiet = options && options->get<bool>("quiet");
Logger general{
createStderrLogger("general", "[%Y-%m-%d %T] %v", generalLogs, quiet)};
createStderrLogger("general", "[%Y-%m-%d %T %t] %v", generalLogs, quiet)};
Logger valid{
createStderrLogger("valid", "[%Y-%m-%d %T] [valid] %v", validLogs, quiet)};
@ -115,7 +115,7 @@ static void unhandledException() {
throw; // rethrow so that we can get access to what()
}
catch (const std::exception& e) {
ABORT("Unhandled {}: {}", typeid(e).name(), e.what());
ABORT("Unhandled exception of type '{}': {}", typeid(e).name(), e.what());
}
catch (...) {
ABORT("Unhandled exception");
@ -145,7 +145,7 @@ static void setErrorHandlers() {
void switchtoMultinodeLogging(std::string nodeIdStr) {
Logger log = spdlog::get("general");
if (log)
log->set_pattern("[%Y-%m-%d %T " + nodeIdStr + "] %v");
log->set_pattern("[%Y-%m-%d %T " + nodeIdStr + ":%t] %v");
}

View File

@ -21,6 +21,16 @@ namespace marian {
*/
#define LOG(level, ...) checkedLog("general", #level, __VA_ARGS__)
// variant that prints the log message only upon the first time the call site is executed
#define LOG_ONCE(level, ...) do { \
static bool logged = false; \
if (!logged) \
{ \
logged = true; \
LOG(level, __VA_ARGS__); \
} \
} while(0)
/**
* Prints logging message regarding validation into stderr and a file specified
* with `--valid-log` option.

View File

@ -149,5 +149,40 @@ bool endsWith(const std::string& text, const std::string& suffix) {
&& !text.compare(text.size() - suffix.size(), suffix.size(), suffix);
}
std::string toUpper(const std::string& s) {
std::locale loc;
std::string res; res.reserve(s.capacity());
for (auto c : s) // @BUGBUG: This won't work with UTF-8 characters.
res.push_back((char)std::toupper(c, loc));
return res;
}
double parseDouble(std::string s) {
double res;
char c; // dummy char--if we succeed to parse this, then there were extraneous characters after the number
auto rc = sscanf(s.c_str(), "%lf%c", &res, &c);
ABORT_IF(rc != 1, "Mal-formed number: {}", s);
return res;
}
// parses a user-friendly number that can have commas and (some) units
double parseNumber(std::string param) {
// get unit prefix
double factor = 1.;
if (!param.empty() && param.back() >= 'A') {
switch (param.back()) {
case 'k': factor = 1.e3; break;
case 'M': factor = 1.e6; break;
case 'G': factor = 1.e9; break;
case 'T': factor = 1.e12; break;
default: ABORT("Invalid or unsupported unit prefix '{}' in {}", param.back(), param);
}
param.pop_back();
}
// we allow users to place commas in numbers (note: we are not actually verifying that they are in the right place)
std::remove_if(param.begin(), param.end(), [](char c) { return c == ','; });
return factor * parseDouble(param);
}
} // namespace utils
} // namespace marian

View File

@ -38,5 +38,9 @@ std::pair<std::string, int> hostnameAndProcessId();
std::string withCommas(size_t n);
bool endsWith(const std::string& text, const std::string& suffix);
std::string toUpper(const std::string& s);
double parseDouble(std::string s);
double parseNumber(std::string s);
} // namespace utils
} // namespace marian

View File

@ -19,7 +19,7 @@ public:
virtual void debug(){};
virtual std::vector<Ptr<Batch>> split(size_t n) = 0;
virtual std::vector<Ptr<Batch>> split(size_t n, size_t sizeLimit = SIZE_MAX) = 0;
const std::vector<size_t>& getSentenceIds() const { return sentenceIds_; }
void setSentenceIds(const std::vector<size_t>& ids) { sentenceIds_ = ids; }

View File

@ -56,7 +56,7 @@ public:
typedef typename DataSet::batch_ptr BatchPtr;
typedef typename DataSet::Sample Sample;
typedef std::vector<Sample> Samples; // @TODO: type names should be capitalized
typedef std::vector<Sample> Samples;
typedef BatchIterator<BatchGenerator> iterator;
friend iterator;
@ -83,7 +83,6 @@ private:
// this runs on a bg thread; sequencing is handled by caller, but locking is done in here
std::deque<BatchPtr> fetchBatches() {
//LOG(info, "fillBatches entered");
typedef typename Sample::value_type Item;
auto itemCmp = [](const Item& sa, const Item& sb) { return sa.size() < sb.size(); }; // sort by element length, not content
@ -118,8 +117,6 @@ private:
size_t maxBatchSize = options_->get<int>("mini-batch");
size_t maxSize = maxBatchSize * options_->get<int>("maxi-batch");
// LOG(info, "Preloading batches");
// consume data from corpus into maxi-batch (single sentences)
// sorted into specified order (due to queue)
if(newlyPrepared_) {
@ -141,8 +138,6 @@ private:
}
size_t numSentencesRead = maxiBatch->size();
// LOG(info, "Turning samples into batches");
// construct the actual batches and place them in the queue
Samples batchVector;
size_t currentWords = 0;
@ -152,7 +147,6 @@ private:
// process all loaded sentences in order of increasing length
// @TODO: we could just use a vector and do a sort() here; would make the cost more explicit
//LOG(info, "begin form batches, #lines = {}", maxiBatch->size());
const size_t mbWords = options_->get<size_t>("mini-batch-words", 0);
const bool useDynamicBatching = options_->has("mini-batch-fit");
BatchStats::const_iterator cachedStatsIter;
@ -205,15 +199,25 @@ private:
}
// turn rest into batch
// @BUGBUG: This can create a very small batch, which with ce-mean-words can artificially
// inflate the contribution of the sames in the batch, causing instability.
// I think a good alternative would be to carry over the left-over sentences into the next round.
if(!batchVector.empty())
tempBatches.push_back(data_->toBatch(batchVector));
//LOG(info, "end form batches, #tempBatches = {}", tempBatches.size());
// Shuffle the batches
if(shuffle_) {
std::shuffle(tempBatches.begin(), tempBatches.end(), eng_);
}
LOG(debug, "[data] fetched {} batches with {} sentences.", tempBatches.size(), numSentencesRead);
double totalSent{}, totalLabels{};
for (auto& b : tempBatches) {
totalSent += (double)b->size();
totalLabels += (double)b->words(-1);
}
auto totalDenom = tempBatches.empty() ? 1 : tempBatches.size(); // (make 0/0 = 0)
LOG(info, "[data] fetched {} batches with {} sentences. Per batch: {} sentences, {} labels.",
tempBatches.size(), numSentencesRead,
(double)totalSent / (double)totalDenom, (double)totalLabels / (double)totalDenom);
return tempBatches;
}
@ -300,6 +304,18 @@ public:
return true;
}
// this is needed for dynamic MB scaling. Returns 0 if size is not known in words.
size_t estimateTypicalTrgBatchWords() const {
const size_t mbWords = options_->get<size_t>("mini-batch-words", 0);
const bool useDynamicBatching = options_->has("mini-batch-fit");
if (useDynamicBatching && stats_)
return stats_->estimateTypicalTrgWords();
else if (mbWords)
return mbWords;
else
return 0;
}
};
class CorpusBatchGenerator : public BatchGenerator<CorpusBase>,

View File

@ -49,6 +49,19 @@ public:
map_[lengths] = batchSize;
}
// return a rough minibatch size in labels
// We average over all (batch sizes * max trg length).
size_t estimateTypicalTrgWords() const {
size_t sum = 0;
for (const auto& entry : map_) {
auto maxTrgLength = entry.first.back();
auto numSentences = entry.second;
auto numLabels = numSentences * maxTrgLength;
sum += numLabels;
}
return sum / map_.size();
}
// helpers for multi-node --note: presently unused, but keeping them around for later use
// serialize into a flat vector, for MPI data exchange
std::vector<size_t> flatten() const {

View File

@ -102,7 +102,7 @@ void Corpus::restore(Ptr<TrainingState> ts) {
}
void Corpus::shuffleData(const std::vector<std::string>& paths) {
LOG(info, "[data] Shuffling files");
LOG(info, "[data] Shuffling data");
size_t numStreams = paths.size();

View File

@ -63,8 +63,8 @@ public:
std::vector<size_t> sentenceIds;
std::vector<int> maxDims;
for(auto& ex : batchVector) {
std::vector<int> maxDims; // @TODO: What's this? widths? maxLengths?
for(auto& ex : batchVector) { // @TODO: rename 'ex' to 'sample' or 'sentenceTuple'
if(maxDims.size() < ex.size())
maxDims.resize(ex.size(), 0);
for(size_t i = 0; i < ex.size(); ++i) {

View File

@ -164,48 +164,51 @@ public:
*/
size_t batchWidth() { return width_; };
/**
* @brief The total number of words in the batch, considering the mask.
* @brief The total number of words in the batch (not counting masked-out words).
*/
size_t batchWords() { return words_; }
/**
* @brief Splits the subbatch into subbatches of equal size.
* @brief Splits the stream into sub-batches of equal size (except for last).
*
* @param n Number of splits
* @param n number of sub-batches to split into
*
* @return Vector of pointers to new subbatches.
* @param sizeLimit Pretend the batch only has this many sentences. Used for MB-size ramp-up.
*
* @return Vector of pointers to new sub-batches (or nullptrs where run out of sub-batches)
*
* @see marian::data::Batch::split(size_t n)
*/
std::vector<Ptr<SubBatch>> split(size_t n) {
ABORT_IF(size_ == 0, "Encoutered sub-batch size of 0");
std::vector<Ptr<SubBatch>> split(size_t n, size_t sizeLimit /*or SIZE_MAX*/) {
ABORT_IF(size_ == 0, "Encountered sub-batch size of 0");
size_t subSize = (size_t)(std::ceil(size_ / (float)n));
auto size = std::min(size_, sizeLimit); // if limit is given then pretend the batch only has that many sentences
size_t targetSubSize = (size_t)(std::ceil(size / (float)n)); // aim at forming sub-batches of this #sentences
std::vector<Ptr<SubBatch>> splits;
for(size_t pos = 0; pos < size_; pos += subSize) {
size_t size = std::min(subSize, size_ - pos);
for(size_t pos = 0; pos < size; pos += targetSubSize) { // loop over ranges of size targetSubSize to form sub-batches of this size
size_t subSize = std::min(targetSubSize, size - pos); // actual number of sentences can be smaller at the end
// determine actual width
// determine actual width (=max length) of this sub-batch, which may be smaller than the overall max length
size_t subWidth = 0;
for(size_t j = 0; j < width_; ++j) {
for(size_t i = 0; i < size; ++i) {
for(size_t i = 0; i < subSize; ++i) {
if(mask_[j * size_ + (pos + i)] != 0)
if (subWidth < j + 1)
subWidth = j + 1;
}
}
//if (subWidth < width_)
// LOG(info, "[data] sub-batch {} of {} wide batch has effective width of {}", pos / subSize, width_, subWidth);
// LOG(info, "[data] sub-batch {} of {} wide batch has effective width of {}", pos / targetSize, width_, subWidth);
// create sub-batch
auto sb = New<SubBatch>(size, subWidth, vocab_);
auto sb = New<SubBatch>(subSize, subWidth, vocab_);
size_t words = 0;
for(size_t j = 0; j < subWidth; ++j) {
for(size_t i = 0; i < size; ++i) {
sb->data()[j * size + i] = indices_[j * size_ + (pos + i)];
sb->mask()[j * size + i] = mask_[j * size_ + (pos + i)];
for(size_t i = 0; i < subSize; ++i) {
sb->data()[j * subSize + i] = indices_[j * size_ + (pos + i)];
sb->mask()[j * subSize + i] = mask_[j * size_ + (pos + i)];
if(mask_[j * size_ + (pos + i)] != 0)
words++;
@ -263,8 +266,8 @@ public:
size_t size() const override { return subBatches_[0]->batchSize(); }
/**
* @brief The total number of words for the longest sentence in the batch plus
* one. Pass which=0 for source and -1 for target.
* @brief The total number of words in the batch (not counting masked-out words).
* Pass which=0 for source words and -1 for target words.
*/
size_t words(int which = 0) const override {
return subBatches_[which >= 0 ? which
@ -349,25 +352,27 @@ public:
}
/**
* @brief Splits the batch into batches of equal size.
* @brief Splits the batch into batches of equal size (except for last).
*
* @param n number of splits
* @param n number of sub-batches to split into
*
* @return Vector of pointers to new batches.
* @param sizeLimit Clip batch content to the first sizeLimit sentences in the batch
*
* @return Vector of pointers to new sub-batches (or nullptrs where run out of sub-batches)
*
* @see marian::data::SubBatch::split(size_t n)
*/
std::vector<Ptr<Batch>> split(size_t n) override {
std::vector<Ptr<Batch>> split(size_t n, size_t sizeLimit /*=SIZE_MAX*/) override {
ABORT_IF(size() == 0, "Encoutered batch size of 0");
std::vector<std::vector<Ptr<SubBatch>>> subs;
// split each subbatch separately
for(auto subBatch : subBatches_) {
size_t i = 0;
for(auto splitSubBatch : subBatch->split(n)) {
std::vector<std::vector<Ptr<SubBatch>>> subs; // [subBatchIndex][streamIndex]
// split each stream separately
for(auto batchStream : subBatches_) {
size_t i = 0; // index into split batch
for(auto splitSubBatch : batchStream->split(n, sizeLimit)) {
if(subs.size() <= i)
subs.resize(i + 1);
subs[i++].push_back(splitSubBatch);
subs[i++].push_back(splitSubBatch); // this forms tuples across streams
}
}

View File

@ -63,7 +63,7 @@ public:
void push_back(Input input) { inputs_.push_back(input); }
virtual std::vector<Ptr<Batch>> split(size_t /*n*/) override { ABORT("Not implemented"); }
virtual std::vector<Ptr<Batch>> split(size_t /*n*/, size_t /*sizeLimit*/) override { ABORT("Not implemented"); }
Data& features() { return inputs_[0].data(); }

21
src/functional/tmp.h Normal file → Executable file
View File

@ -81,6 +81,27 @@ struct FApply<4, Functor> {
}
};
template <class Functor>
struct FApply<5, Functor> {
__HDI__ static float apply(
Functor functor,
functional::Array<functional::Tensor<float>, 5>& in,
const functional::Array<int, 5>& indices) {
return functor(in[0][indices[0]],
in[1][indices[1]],
in[2][indices[2]],
in[3][indices[3]],
in[4][indices[4]]);
}
__HDI__ static float apply(
Functor functor,
functional::Array<functional::Tensor<float>, 5>& in,
int index) {
return functor(in[0][index], in[1][index], in[2][index], in[3][index], in[4][index]);
}
};
template <size_t K, class Functor>
__HDI__ float apply(Functor functor,
functional::Array<functional::Tensor<float>, K>& in,

View File

@ -63,7 +63,7 @@ public:
tensors_->allocate(node->grad(), node->shape(), node->value_type());
}
void free(Tensor& tensor) { tensors_->free(tensor); }
void free(const Tensor& tensor) { tensors_->free(tensor); }
// @TODO: get rid of this, not really used or can be done better
Ptr<Allocator> allocator() { return tensors_->allocator(); }
@ -437,7 +437,7 @@ public:
tensors_->allocateBackward(node);
}
void free(Tensor& tensor) {
void free(const Tensor& tensor) {
if(tensors_)
tensors_->free(tensor);
}

4
src/graph/expression_operators.h Normal file → Executable file
View File

@ -66,9 +66,9 @@ Expr operator/(Expr a, float b);
Expr logaddexp(Expr a, Expr b);
Expr max(Expr a, Expr b); // TODO: haggle over the name (max vs. elementMax)
Expr maximum(Expr a, Expr b);
Expr min(Expr a, Expr b); // TODO: haggle over the name
Expr minimum(Expr a, Expr b);
Expr dot(Expr a,
Expr b,

2
src/graph/node_operators.h Normal file → Executable file
View File

@ -50,7 +50,7 @@ struct ParamNode : public Node {
~ParamNode() {}
virtual size_t allocate() override {
ABORT_IF(!val_, "Parameters should be allocated by their graph");
ABORT_IF(!val_, "Parameters should be allocated by their graph. Parameter {} was not", name_);
return 0;
}

View File

@ -42,7 +42,7 @@ public:
Shape outShape = shapeA;
outShape.set(outShape.size() - 1, shapeB[shapeB.size() - 1]);
ABORT_IF(shapeA[shapeA.size() - 1] != shapeB[shapeB.size() - 2],
"matrix product requires dimensions to match");
"Matrix product requires dimensions to match");
return outShape;
}
@ -165,7 +165,7 @@ public:
Shape outShape = shapeA;
outShape.set(outShape.size() - 1, shapeB[shapeB.size() - 1]);
ABORT_IF(shapeA[shapeA.size() - 1] != shapeB[shapeB.size() - 2],
"matrix product requires dimensions to match");
"Matrix product requires dimensions to match");
return outShape;
}
@ -309,7 +309,7 @@ public:
Shape outShape = shapeA;
outShape.set(-1, shapeB[-1]);
ABORT_IF(shapeA[-1] != shapeB[-2],
"matrix product requires dimensions to match");
"Batched matrix product requires dimensions to match");
return outShape;
}

0
src/graph/node_operators_unary.h Normal file → Executable file
View File

View File

@ -2,10 +2,12 @@
#include "common/io.h"
#include "tensors/tensor_operators.h"
#include <array>
namespace marian {
void Sgd::updateImpl(Tensor params, Tensor grads) {
void Sgd::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) {
actualMBSize, refMBSize; // (no correction for base update needed beyond using ce-sum)
using namespace functional;
Element(_1 -= eta_ * _2,
params,
@ -14,9 +16,10 @@ void Sgd::updateImpl(Tensor params, Tensor grads) {
params->getBackend()->synchronize();
}
// Aagrad
// Adagrad
void Adagrad::updateImpl(Tensor params, Tensor grads) {
void Adagrad::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) {
ABORT_IF(actualMBSize != refMBSize, "Adagrad does not support rational hyper-parameter adjustment");
if(!alloc_)
alloc_ = New<TensorAllocator>(params->getBackend());
@ -62,7 +65,7 @@ void Adagrad::load(const std::string& name,
if(item.name == "adagrad_gt") {
vGt.resize(totalSize);
std::copy(
(float*)item.data(), (float*)item.data() + totalSize, vGt.begin());
(float*)item.data(), ((float*)item.data()) + totalSize, vGt.begin());
}
}
if(vGt.empty()) {
@ -109,7 +112,7 @@ void Adagrad::save(const std::string& name,
item.type = Type::float32;
item.bytes.resize(vGt.size() * sizeOf(item.type));
std::copy(
(char*)vGt.data(), (char*)vGt.data() + vGt.size(), item.bytes.begin());
(char*)vGt.data(), (char*)(vGt.data() + vGt.size()), item.bytes.begin());
io::saveItems(name, {item});
}
@ -121,7 +124,8 @@ void Adagrad::resetStats() {
// Adam
void Adam::updateImpl(Tensor params, Tensor grads) {
void Adam::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) {
// lazy allocation
if(!alloc_)
alloc_ = New<TensorAllocator>(params->getBackend());
@ -130,29 +134,42 @@ void Adam::updateImpl(Tensor params, Tensor grads) {
alloc_->reserveExact(2 * params->memory()->size());
alloc_->allocate(mt_, {1, elements});
mt_->set(0.f);
alloc_->allocate(vt_, {1, elements});
vt_->set(0.f);
}
t_++;
float denom1 = 1 - (float)std::pow(beta1_, t_);
float denom2 = 1 - (float)std::pow(beta2_, t_);
double Tref = (double)refMBSize;
double T = (double)actualMBSize;
// adjust for minibatch-size changes if Adam parameters are given a reference size (else do nothing)
double eta = eta_ * (T/Tref);
double beta1 = beta1_;
double beta2 = beta2_;
double decay = w_ ;
// denominators. At steady state: =1. This recursion does the same as the Adam beta correction term.
denom1_ = (beta1 * denom1_) + (1 - beta1); // momentum smoothing
denom2_ = (beta2 * denom2_) + (1 - beta2); // RMS normalization
LOG_ONCE(info, "[adam] First update: Tref = {}, T = {}, eta = {} -> {}, beta = {}, {}", Tref, T, eta_, eta, beta1, beta2);
// numerators. Divide by T to convert ce-sum gradient to avg gradient.
using namespace functional;
Element(_1 = ((float)beta1 * _1) + float((1 - beta1) / T ) * _2, mt_, grads); // momentum smoothing. At steady state: =smoothed avg gradient
Element(_1 = ((float)beta2 * _1) + float((1 - beta2) / T / T) * (_2 * _2), vt_, grads); // RMS normalization. At steady state: =mean square of the avg gradients
Element(_1 = (beta1_ * _1) + ((1 - beta1_) * _2), mt_, grads);
Element(_1 = (beta2_ * _1) + ((1 - beta2_) * (_2 * _2)), vt_, grads);
// apply Adam normalization
float etaf = (float)eta, denom1f = (float)denom1_, denom2f = (float)denom2_, decayf = (float)decay; // (get casts out of Element expression for readability)
Element(_1 -= etaf // learning-rate: x_t = x_{t-1} - \eta * (...)
* (( ( _2 / denom1f) // momentum-smoothed per-sample gradient: m_{t-1}
/ (sqrt(_3 / denom2f) + eps_)) // normalize by RMS: \sqrt(v_{t-1})
+ decayf * _1), // weight-decay: w * x_{t-1}
params, // =_1
mt_, // =_2
vt_ // =_3
);
Element(_1 -= eta_ // learning-rate: x_t = x_{t-1} - \eta * (...)
* ((_2 / denom1) // 1st moment: m_{t-1}
/ (sqrt(_3 / denom2) + eps_) // 2nd moment: \sqrt(v_{t-1})
+ w_ * _1), // weight-decay: w * x_{t-1}
params,
mt_,
vt_);
params->getBackend()->synchronize();
params->getBackend()->synchronize(); // @TODO: This should not be in here. Maybe in the wrapper. Why is it needed at all?
}
void Adam::load(const std::string& name,
@ -168,6 +185,7 @@ void Adam::load(const std::string& name,
std::vector<float> vMt;
std::vector<float> vVt;
std::array<double, 2> vDenoms;
auto items = io::loadItems(name);
for(auto item : items) {
@ -178,12 +196,18 @@ void Adam::load(const std::string& name,
if(item.name == "adam_mt") {
vMt.resize(totalSize);
std::copy(
(float*)item.data(), (float*)item.data() + totalSize, vMt.begin());
(float*)item.data(), ((float*)item.data()) + totalSize, vMt.begin());
}
if(item.name == "adam_vt") {
else if(item.name == "adam_vt") {
vVt.resize(totalSize);
std::copy(
(float*)item.data(), (float*)item.data() + totalSize, vVt.begin());
(float*)item.data(), ((float*)item.data()) + totalSize, vVt.begin());
}
else if(item.name == "adam_denoms") {
ABORT_IF(totalSize != 2, "adam_denoms should have 2 entries");
std::copy(
(double*)item.data(), ((double*)item.data()) + totalSize, vDenoms.begin());
// Back compat note: Old files lacked "adam_denoms". For those, vDenoms will remain 0, which reproduces the old behavior.
}
}
if(vMt.empty() || vVt.empty()) {
@ -212,6 +236,9 @@ void Adam::load(const std::string& name,
auto opt = std::dynamic_pointer_cast<Adam>(opts[id]);
opt->vt_->set(std::vector<float>(begin, end));
});
denom1_ = vDenoms[0];
denom2_ = vDenoms[1];
//LOG(info, "done loading Adam params");
}
@ -248,7 +275,7 @@ void Adam::save(const std::string& name,
itemMt.type = Type::float32;
itemMt.bytes.resize(vMt.size() * sizeOf(itemMt.type));
std::copy(
(char*)vMt.data(), (char*)vMt.data() + vMt.size(), itemMt.bytes.begin());
(char*)vMt.data(), (char*)(vMt.data() + vMt.size()), itemMt.bytes.begin());
io::Item itemVt;
itemVt.name = "adam_vt";
@ -256,9 +283,19 @@ void Adam::save(const std::string& name,
itemVt.type = Type::float32;
itemVt.bytes.resize(vVt.size() * sizeOf(itemVt.type));
std::copy(
(char*)vVt.data(), (char*)vVt.data() + vVt.size(), itemVt.bytes.begin());
(char*)vVt.data(), (char*)(vVt.data() + vVt.size()), itemVt.bytes.begin());
io::saveItems(name, {itemMt, itemVt});
// @TODO: this pattern is duplicated several times; refactor it
std::array<double, 2> vDenoms{denom1_, denom2_};
io::Item itemDenoms;
itemDenoms.name = "adam_denoms";
itemDenoms.shape = Shape({1, (int)vDenoms.size()});
itemDenoms.type = Type::float64;
itemDenoms.bytes.resize(vDenoms.size() * sizeOf(itemDenoms.type));
std::copy(
(char*)vDenoms.data(), (char*)(vDenoms.data() + vDenoms.size()), itemDenoms.bytes.begin());
io::saveItems(name, {itemMt, itemVt, itemDenoms});
}
void Adam::resetStats() {
@ -267,6 +304,9 @@ void Adam::resetStats() {
if(vt_)
vt_->set(0.f);
denom1_ = 0; // @BUGBUG: or 1 or refMBSize if so specified. Fix once we have proper parameterization for that.
denom2_ = 0;
}
Ptr<OptimizerBase> Optimizer(Ptr<Options> options) {
@ -287,7 +327,7 @@ Ptr<OptimizerBase> Optimizer(Ptr<Options> options) {
} else if(opt == "adagrad") {
return Optimizer<Adagrad>(lrate, clipper, params);
} else if(opt == "adam") {
return Optimizer<Adam>(lrate, clipper, params);
return Optimizer<Adam>(lrate, clipper, params); // @TODO: parse the parameters here, or just pass the options object
} else {
ABORT("Unknown optimizer: {}", opt);
}

View File

@ -21,19 +21,29 @@ public:
OptimizerBase(float eta, Ptr<ClipperBase> clipper = nullptr)
: eta_(eta), clipper_(clipper) {}
void update(Ptr<ExpressionGraph> graph) {
static constexpr size_t mbSizeNotProvided = SIZE_MAX;
void update(Ptr<ExpressionGraph> graph, size_t mbSize = mbSizeNotProvided) {
Tensor p = graph->params()->vals();
Tensor g = graph->params()->grads();
update(p, g);
update(p, g, mbSize);
}
void update(Tensor params, Tensor grads) {
void update(Tensor params, Tensor grads, size_t mbSize = mbSizeNotProvided) {
if(clipper_)
clipper_->clip(grads);
// In case we want to add a multiply factor to our learning rate
updateImpl(params, grads);
size_t refMBSize = refMBSize_;
if (refMBSize == 0) { // optimizer not configured to use hyper-parameter auto-adjustment
refMBSize = mbSize = 1; // neutral settings that keep the standard behavior
}
else { // optimizer is configured to auto-adjust hyper-parameters
ABORT_IF(mbSize == mbSizeNotProvided, "Using rational optimizer auto-adjustment with trainer that does not provide MB size");
// note: this behavior is only meaningful if using the ce-sum criterion
}
updateImpl(params, grads, mbSize, refMBSize);
}
virtual void init(TrainingState& state) override {
@ -78,7 +88,7 @@ public:
bool /*isMainProcess*/ = true) {}
protected:
virtual void updateImpl(Tensor params, Tensor grads) = 0;
virtual void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) = 0;
virtual void parseParams(const std::vector<float>& params) = 0;
virtual void resetStats() = 0;
@ -86,6 +96,8 @@ protected:
float eta_;
// Clip gradient norm
Ptr<ClipperBase> clipper_;
// Reference MB size. This enables automatic adjustment of optimizer hyper-parameters to MB size.
size_t refMBSize_{0}; // 0 means no adjustment
};
/**
@ -97,7 +109,7 @@ public:
: OptimizerBase(eta, clipper) {}
private:
void updateImpl(Tensor params, Tensor grads) override;
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) override;
virtual void parseParams(const std::vector<float>& /*params*/) override {}
virtual void resetStats() override {}
@ -123,7 +135,7 @@ public:
bool /*isMainProcess*/ = true) override;
private:
void updateImpl(Tensor params, Tensor grads) override;
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) override;
void resetStats() override;
void parseParams(const std::vector<float>& params) override {
@ -140,11 +152,13 @@ private:
* @brief Adam optimizer
*
* https://arxiv.org/pdf/1412.6980v8.pdf
*
* with Frank's modifications for automatic hyper-parameter adjustment.
*/
class Adam : public OptimizerBase {
public:
Adam(float eta, Ptr<ClipperBase> clipper = nullptr)
: OptimizerBase(eta, clipper), t_(0) {}
: OptimizerBase(eta, clipper) {}
void load(const std::string& name,
const std::vector<Ptr<OptimizerBase>>& opts,
@ -156,9 +170,11 @@ public:
bool isMainProcess = true) override;
private:
void updateImpl(Tensor params, Tensor grads) override;
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBSize) override;
void resetStats() override;
// Adam parameters:
// [beta1, beta2, eps, w, refMBSize]
virtual void parseParams(const std::vector<float>& params) override {
if(params.size() > 0)
beta1_ = params[0];
@ -169,15 +185,29 @@ private:
// weighted decay for AdamW, to be explored, disabled by default
if(params.size() > 3)
w_ = params[3];
w_ = params[3]; // default (disabled): 0
// automatic learning-rate adjustment
// If users provide, in addition to the hyper-parameters, a reference minibatch size,
// that these hyper-parameters were originally tuned for, then the learning-rate gets
// adjusted accordingly. Note: Requires user to also use ce-sum criterion.
if(params.size() > 4) {
refMBSize_ = (size_t)params[4]; // default (disabled): 0
LOG(info, "Note: Modified Adam optimizer: automatically adjusting learning rate as if minibatch size was {}", refMBSize_);
}
}
// hyper-parameters
float beta1_ = 0.9f;
float beta2_ = 0.999f;
float eps_ = 1e-8f;
float w_ = 0.0f;
size_t t_;
// CPU-side running accumulators
double denom1_ = 0;
double denom2_ = 0;
// GPU-side running accumulators
Ptr<TensorAllocator> alloc_;
Tensor mt_;
Tensor vt_;

0
src/tensors/gpu/add.cu Normal file → Executable file
View File

6
src/tensors/gpu/element.inc Normal file → Executable file
View File

@ -48,3 +48,9 @@ template void Element<Assign<Var<1>, BinaryFunctor<elem::Div, BinaryFunctor<elem
template void Element<Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, Assignee<1>>>>>>>(Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, Assignee<1>>>>>>, marian::Tensor);
template void Element<Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, Assignee<1>, Capture>>>, Capture>>>>>(Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, Assignee<1>, Capture>>>, Capture>>>>, marian::Tensor);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture> >, marian::Tensor);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> > > >, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> > > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> > > >, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> > > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5> >, Assignee<4> >, Capture> >, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5> >, Assignee<4> >, Capture> >, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);

View File

@ -74,7 +74,7 @@ public:
}
}
void free(Tensor& t) { allocator_->free(t->memory()); }
void free(const Tensor& t) { allocator_->free(t->memory()); }
Tensor asTensor() {
auto mem = allocator_->memory();

View File

@ -72,7 +72,7 @@ class MPIWrapper : public IMPIWrapper
public:
MPIWrapper(bool multiThreaded) {
int requiredThreadingMode = multiThreaded ? MPI_THREAD_MULTIPLE : MPI_THREAD_SINGLE;
int requiredThreadingMode = multiThreaded ? MPI_THREAD_MULTIPLE : MPI_THREAD_FUNNELED; // FUNNELED means only one thread ever calls MPI
int argc = 1; char* argv[] = { const_cast<char*>("this.exe") }; char** argvp = argv; // dummy argc/argv since MPI_Init needs something here
int providedThreadingMode;
@ -124,6 +124,8 @@ public:
HANDLE_MPI_ERROR(MPI_Recv(buf, (int)count, datatype, (int)sourceRank, tag, comm, status));
}
virtual void allReduce(const void* sendbuf, void* recvbuf, size_t count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) const override {
if (sendbuf == recvbuf)
sendbuf = MPI_IN_PLACE; // MSMPI requires this
HANDLE_MPI_ERROR(MPI_Allreduce(sendbuf, recvbuf, (int)count, datatype, op, comm));
}
virtual void finalize() override {

View File

@ -203,7 +203,6 @@ public:
void swapParams(const std::vector<Tensor>& paramShards) const override {
// Update all graphs with parameter shard
auto gather = [this, paramShards](size_t idx, size_t begin, size_t end) {
ABORT_IF(end - begin != paramShards[idx]->size(), "inconsistent shard size (swapParams, [{}], {} vs {})??", idx, end-begin, paramShards[idx]->size());
// Copy parameter shard to each graph, apart from last graph

View File

@ -4,10 +4,11 @@
#include "3rd_party/threadpool.h"
#include "tensors/gpu/cuda_helpers.h"
#include "common/timer.h"
// Generated by NCCL make files in build/nccl/include;
// include dir has been set in CMake files. NCCL add version number etc.
#include "nccl.h"
#include <cuda_runtime.h>
#if (NCCL_MAJOR<3 || NCCL_MINOR<2)
@ -44,6 +45,14 @@ private:
}
}
void synchronizeAllOnNullStream() const {
for (int i = 0; i < graphs_.size(); ++i) {
auto backend = graphs_[i]->params()->vals()->getBackend();
backend->setDevice();
backend->synchronize(); // note: synchronize() does not set the device by itself
}
}
std::string mpiIdStr() const { // (for logging)
return mpi_ ? mpi_->idStr() : "";
}
@ -150,6 +159,16 @@ public:
CUDA_CHECK(cudaStreamCreate(&streams_[i]));
}
// Note: due to a bug in NCCL 2.3.5, NCCL's allocation of shared memory intermittently fails with
// Failed, NCCL error 2 'unhandled system error' - ncclGroupEnd()
// include/shm.h:26 NCCL WARN Unable to allocate shared memory (4263936 bytes) : Interrupted system call
// This is caused by SIGPROF signals being raised, causing EINTR, which NCCL does not handle.
// Reported as Issue #137 on the NCCL Github, and supposedly fixed for 2.3.7 (to be verified).
// To work around, we disable the SIGPROF signal during NCCL initialization.
#define SIG_BAD 27 // SIGPROF
BlockSignal blockThread(SIG_BAD, pthread_sigmask); // Note: I don't know yet which of these two makes the difference.
BlockSignal blockProc(SIG_BAD, sigprocmask); // So for now just block both.
// set up NCCL
// Since we want to use MPI, we cannot use NCCL's handy convenience function. Instead, we must go the laborious route.
// cf. https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/index.html#multidevprothrd
@ -160,35 +179,19 @@ public:
NCCL_CHECK(ncclGetUniqueId(&uniqueId));
if (mpi_) {
//LOG(info, "[{}] before bcast", mpiIdStr());
static_assert(sizeof(uniqueId) == NCCL_UNIQUE_ID_BYTES, "wrong NCCL_UNIQUE_ID_BYTES??"); // (this value is used in NVidia examples)
mpi_->bCast(&uniqueId, sizeof(uniqueId), MPI_BYTE, 0);
//LOG(info, "[{}] after bcast", mpiIdStr());
}
//mpiBarrier(); // should not be needed since bCast is a barrier
// Note: due to a bug in NCCL 2.3.5, NCCL's allocation of shared memory intermittently fails with
// Failed, NCCL error 2 'unhandled system error' - ncclGroupEnd()
// include/shm.h:26 NCCL WARN Unable to allocate shared memory (4263936 bytes) : Interrupted system call
// This is caused by SIGPROF signals being raised, causing EINTR, which NCCL does not handle.
// Reported as Issue #137 on the NCCL Github.
// To work around, we disable the SIGPROF signal during NCCL initialization.
#define SIG_BAD 27 // SIGPROF
BlockSignal blockThread(SIG_BAD, pthread_sigmask); // Note: I don't know yet which of these two makes the difference.
BlockSignal blockProc(SIG_BAD, sigprocmask); // So for now just block both.
groupStart();
for (int localDeviceIndex = 0; localDeviceIndex < devices_.size(); localDeviceIndex++) {
CUDA_CHECK(cudaSetDevice(devices_[localDeviceIndex]));
//LOG(info, "[{}] ncclCommInitRank {} out of {}: GPU[{}]", mpiIdStr(), myNcclRank(localDeviceIndex), numNcclRanks(), localDeviceIndex);
NCCL_CHECK(ncclCommInitRank(&comms_[localDeviceIndex], numNcclRanks(), uniqueId, myNcclRank(localDeviceIndex)));
//LOG(info, "[{}] done ncclCommInitRank {} out of {}, GPU[{}]", mpiIdStr(), myNcclRank(localDeviceIndex), numNcclRanks(), localDeviceIndex);
}
groupEnd();
mpiBarrier(); // (synchronize the log messages)
LOG(debug, "NCCLCommunicator constructed successfully for {}", mpiIdStr());
LOG(info, "NCCLCommunicator constructed successfully.");
mpiBarrier(); // (synchronize the log messages)
}
@ -206,61 +209,46 @@ public:
for(size_t i = 0; i < graphs_.size(); ++i) {
size_t begin, end; std::tie
(begin, end) = localShardRange(i);
//std::cerr << "[" << mpiIdStr() << "] foreach " << begin << " " << end << std::endl;
try{
if (parallel)
threadResults_[i] = threadPool_.enqueue(func, i, begin, end);
//group.emplace_back(func, i, begin, end);
//threadPool_.enqueue([&](size_t i){
// func(i, begin, end);
//}, i);
else
func(i, begin, end);
}
catch (const std::exception& e) // something leaks thread handles
{
// keeping this around, in case the error still happens --@TODO: remove once this has not been observed anymore
LOG(info, "caught exception in foreach {}", i);
system("ps -T -A");
throw;
}
}
if (parallel)
for(size_t i = 0; i < graphs_.size(); ++i)
threadResults_[i].wait();
//for(auto& t : group) // (note: group is empty is not parallel)
// t.join();
}
void scatterReduce() const override {
synchronizeAllOnNullStream();
groupStart();
for(int i = 0; i < graphs_.size(); ++i) {
size_t begin, end; std::tie
(begin, end) = localShardRange(i);
//std::cerr << "[" << mpiIdStr() << "] scatterReduce " << begin << " " << end << std::endl;
auto grads = graphs_[i]->params()->grads();
const auto* sendbuf = grads->data();
auto* recvbuf = grads->subtensor(begin, end-begin)->data();
size_t bufsize = shardSize();
ABORT_IF(grads->subtensor(begin, end-begin)->size() != bufsize, "unexpected subtensor size??");
NCCL_CHECK(ncclReduceScatter(sendbuf, recvbuf, bufsize, ncclFloat, ncclSum, comms_[i], streams_[i]));
}
groupEnd();
//std::cerr << "scatterReduce submitted" << std::endl;
synchronizeAll();
//std::cerr << "scatterReduce completed" << std::endl;
}
// This distributes all 64 model shards to all 64 GPUs.
// @TODO: For unknown reasons, this takes longer than any other operation incl. scatterReduce().
// But both should have the same number of data transfers of the same size.
void allGather() const override {
synchronizeAllOnNullStream();
groupStart();
for(int i = 0; i < graphs_.size(); ++i) {
size_t begin, end; std::tie
(begin, end) = localShardRange(i);
//std::cerr << "[" << mpiIdStr() << "] allGather " << begin << " " << end << std::endl;
auto vals = graphs_[i]->params()->vals();
const auto* sendbuf = vals->subtensor(begin, end-begin)->data();
@ -281,14 +269,12 @@ catch (const std::exception& e) // something leaks thread handles
auto distributedParams = gatherState([&](size_t localDeviceIndex) {
std::vector<float> tmp;
distributedParamShards[localDeviceIndex]->get(tmp);
//LOG(info, "[{}] swapParams.getFn({}) -> size {}, ({}, {}, {}, ...)", mpiIdStr(), localDeviceIndex, tmp.size(), tmp[0], tmp[1], tmp[2]);
return tmp;
});
// Now all MPI processes hold an identical copy of a concatenation of all distributedParamShards[] across local and remote devices.
std::vector<float> localParams;
graphs_[0]->params()->vals()->get(localParams);
// Now all MPI processes hold an identical copy of params() (remember, we assumed all devices hold the same params()).
//LOG(info, "[{}] swapParams: distributedParams.size = {}, localParams.size = {}", mpiIdStr(), distributedParams.size(), localParams.size());
ABORT_IF(distributedParams.size() != localParams.size(), "distributed sharded and local params have different size??");
// swap
@ -331,7 +317,6 @@ catch (const std::exception& e) // something leaks thread handles
tmp = getFn(localDeviceIndex);
localData.insert(localData.end(), tmp.begin(), tmp.end());
}
//LOG(info, "[{}] gatherState: localData.size = {}", mpiIdStr(), localData.size());
// second, concatenate across MPI processes
// Note that all local devices occupy consecutive ncclRanks in order.
std::vector<float> data;

View File

@ -3,6 +3,7 @@
#include "common/definitions.h"
#include "functional/functional.h"
#include "tensors/tensor_operators.h"
#include "optimizers/optimizers.h"
namespace marian {
@ -12,18 +13,33 @@ namespace marian {
*/
class ExponentialSmoothing {
public:
ExponentialSmoothing(float decay = 0.0f)
: mvAvg_{decay > 0}, mvDecay_{decay} {}
ExponentialSmoothing(Ptr<Options> options) {
auto args = options->get<std::vector<float>>("exponential-smoothing");
ABORT_IF(args.size() < 1 || args.size() > 2, "exponential-smoothing parameter must be one or two numbers");
mvDecayBy_ = args[0];
if (args.size() > 1)
refBatchTrgWords_ = (size_t)args[1];
mvAvg_ = (mvDecayBy_ > 0);
}
protected:
void updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches) {
void updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches, size_t actualBatchTrgWords = OptimizerBase::mbSizeNotProvided) {
double beta = 1. - mvDecayBy_;
// correction term if batch size is different from what mvDecayBy_ was specified for
if (refBatchTrgWords_) {
ABORT_IF(actualBatchTrgWords == OptimizerBase::mbSizeNotProvided,
"This graph-group type does not support reference batch size specification for exponential-smoothing");
beta = pow(beta, (double)actualBatchTrgWords / (double)refBatchTrgWords_);
}
// reduce effect of decay parameter in early training stages
float decayBy = std::max(1.f - (float)beta,
1.f - (float)(batches + 1) / (float)(batches + 10));
using namespace functional;
float decay = std::max(mvDecay_,
1.f - (float)(batches + 1) / (float)(batches + 10));
Element(_1 = ((1.f - decay) * _1) + (decay * _2), paramsAvg, params);
Element(_1 = ((1.f - decayBy) * _1) + (decayBy * _2), paramsAvg, params);
}
bool mvAvg_{false};
float mvDecay_{1e-4f};
float mvDecayBy_{1e-4f}; // decay prior model by this factor
size_t refBatchTrgWords_{0}; // mvDecayBy_ is specified for this batch size (in target words)
};
} // namespace marian

View File

@ -33,6 +33,10 @@ public:
virtual void save(bool isFinal = false) = 0;
void validate() {
ABORT_IF(finalized_, "Training has already finished.");
}
virtual void finalize() {
finalized_ = true;
}
@ -48,6 +52,7 @@ public:
* The actual allowed size is then determined by multiplying it with the
* number of devices, which is passed in as the 'multiplier'.
*/
// @TODO: Can this be made const? It seems wrong to have a stateful method that still returns a result.
virtual Ptr<data::BatchStats> collectStats(Ptr<ExpressionGraph> graph,
Ptr<models::ModelBase> model,
size_t multiplier = 1) {
@ -194,10 +199,8 @@ public:
}
virtual void finalize() override {
if (mpi_) {
if (mpi_)
finalizeMPI(std::move(mpi_));
ABORT_IF(mpi_, "MPI not finalized??");
}
Base::finalize();
}
};

View File

@ -7,7 +7,7 @@ namespace marian {
AsyncGraphGroup::AsyncGraphGroup(Ptr<Options> config)
: GraphGroup(config),
ExponentialSmoothing{options_->get<float>("exponential-smoothing")},
ExponentialSmoothing(options_),
devices_{Config::getDevices(options_)},
shardSync_(devices_.size()),
optimizerDelay_{options_->get<size_t>("optimizer-delay")} {

View File

@ -55,7 +55,7 @@ public:
AsyncGraphGroup(Ptr<Options> config);
void update(Ptr<data::Batch> batch) override {
ABORT_IF(finalized_, "Training has already finished");
validate();
execute(batch);
}

View File

@ -376,7 +376,7 @@ public:
* Update any client model with given batch if batch is assigned to this node.
*/
void update(Ptr<data::Batch> batch) override {
ABORT_IF(finalized_, "Training has already finished");
validate();
// Only take batch assigned to this node
if(batchIter_ % mpi_->numMPIProcesses() == (size_t)mpi_->myMPIRank()) {
execute(batch);

View File

@ -143,7 +143,7 @@ public:
* Update any client model with given batch if batch is assigned to this node.
*/
void update(Ptr<data::Batch> batch) override {
ABORT_IF(finalized_, "Training has already finished");
validate();
if(batchIter_ % mpi_->numMPIProcesses() == mpi_->myMPIRank()) { // Only take batch assigned to this node
execute(batch);
}

View File

@ -25,7 +25,7 @@ private:
public:
SingletonGraph(Ptr<Options> config)
: GraphGroup(config),
ExponentialSmoothing(options_->get<float>("exponential-smoothing")) {
ExponentialSmoothing(config) {
// Get device ID
auto devices = Config::getDevices(options_);
ABORT_IF(devices.size() != 1, "Only one device ID should be provided for singleton training");
@ -40,7 +40,7 @@ public:
}
void update(Ptr<data::Batch> batch) override {
ABORT_IF(finalized_, "Training has already finished");
validate();
execute(batch);
}

View File

@ -4,7 +4,7 @@ namespace marian {
SyncGraphGroup::SyncGraphGroup(Ptr<Options> config)
: GraphGroup(config),
ExponentialSmoothing{options_->get<float>("exponential-smoothing")},
ExponentialSmoothing(config),
delay_{options_->get<size_t>("optimizer-delay")} { // @TODO: rename to something else; delay means delayed updated, not accumulation
mpi_ = initMPI(/*multiThreaded=*/false); // when not running under MPI, this will be a fake object that represents a one-MPI-process setup
@ -25,9 +25,16 @@ SyncGraphGroup::SyncGraphGroup(Ptr<Options> config)
// This part of the code will not special-case any of this here.
// Rather, it is assumed that the communicator knows to reduce unnecessary transfers to no-ops.
comm_ = createCommunicator(graphs_, /*noNccl=*/options_->get<bool>("no-nccl", false), /*mpi=*/mpi_);
auto type = utils::toUpper(devices_.front().typeAsString()) + "s";
if (mpi_->numMPIProcesses() > 1)
LOG(info, "[training] Using {} {}, distributed over {} MPI processes", mpi_->numMPIProcesses() * devices_.size(), type, mpi_->numMPIProcesses());
else
LOG(info, "[training] Using {} {}", devices_.size(), type);
}
void SyncGraphGroup::setScheduler(Ptr<Scheduler> scheduler) /*override*/ {
validate();
scheduler_ = scheduler;
// optimizer has to be registered last to see changes of learning rate
// @TODO: ^^Fix this comment. Either it refers to the scheduler, or it should be moved. Which one?
@ -101,31 +108,144 @@ void SyncGraphGroup::initializeAvg() {
}
Ptr<data::BatchStats> SyncGraphGroup::collectStats() {
// @TODO: This should only run on MPI process 0. Also we can share vv this vv expression with update().
size_t multiplier = devices_.size() * mpi_->numMPIProcesses() * delay_;
return GraphGroup::collectStats(graphs_[0], builders_[0], multiplier);
// @TODO: This is an incompatible change. Decide how to handle that.
//size_t multiplier = devices_.size() * mpi_->numMPIProcesses() * delay_;
return GraphGroup::collectStats(graphs_[0], builders_[0]/*, multiplier*/);
}
void SyncGraphGroup::update(Ptr<data::Batch> batch) /*override*/ {
ABORT_IF(finalized_, "Training has already finished");
// helper for MB scaling: quantize the ratio with a given error margin
static double roundUpRatio(double ratio) {
if (ratio == 0)
return ratio;
// find largest power of two that fits into ratio
double p = 1;
while (p*2 < ratio)
p *= 2;
// round up to nearest multiple of a largest power of 2 where relative error is within margin
// 25% error margin seems acceptable:
// - using a 25% larger MB size should not break convergence
// - @TODO: not using the first 25% of the next block is OK since those are dominated by data exchange
double maxError = 0.25;
while (p >= 1) {
double proposedRatio = ceil(ratio / p) * p;
double error = (proposedRatio - ratio) / ratio;
if (fabs(error) <= maxError)
return proposedRatio;
p /= 2;
}
return ratio;
}
// distribute the batch over (delay, local device, MPI rank)
size_t numSubBatches = delay_ * devices_.size() * mpi_->numMPIProcesses();
auto subBatches = batch->split(numSubBatches);
subBatches.resize(numSubBatches); // pad with nullptrs if out of data
// helper routine that handles accumulation and load-balancing of sub-batches to fill all devices
// It adds 'newBatch' to 'pendingBatches_', and if sufficient batches have been queued, then
// returns 'pendingBatches_' in 'subBatches' and resets it. If not, it returns false.
bool SyncGraphGroup::tryGetSubBatches(Ptr<data::Batch> newBatch, std::vector<Ptr<data::Batch>>& subBatches) {
pendingBatches_.push_back(newBatch);
size_t warpSize = devices_.size() * mpi_->numMPIProcesses(); // warp := set of batches processed concurrently across GPus and workers
size_t pendingTrgWords = 0; // diagnosics only: compute how many target labels are pending so far
for (const auto& batch : pendingBatches_)
pendingTrgWords += batch->wordsTrg();
// MB-size warm-up and dynamic scaling
double ratio;
bool isDynamic = scheduler_->tryGetDynamicMBSizeMultiplier(ratio);
if (isDynamic)
ratio = roundUpRatio(ratio); // round up to full batches if within a certain error margin --@BUGBUG: Not invariant w.r.t. GPU size, as ratio is relative to what fits into 1 GPU
else // if dynamic scaling not enabled, then fill each GPU with a batch
ratio = (double)(delay_ * warpSize);
if (pendingBatches_.size() < ratio)
return false; // not enough data yet
// now we have enough to fill at least 'ratio' batches
if (pendingBatches_.size() == ratio)
return true; // nothing to do, e.g. warm-up not enabled
// warm-up is happening
LOG_ONCE(info, "[training] Mini-batch-warmup enabled");
// shorten all batches a little to accurately reflect ratio
// e.g. ratio = 3.3 for 4 batches: Reduce each by 3.3/4
// Alternatively, we could just shorten the last 'warp', but that would not be invariant to warp size.
size_t before = 0, after = 0;
for (auto& batch : pendingBatches_) {
auto reducedBatchSize = (size_t)ceil((double)batch->size() * ratio / (double)pendingBatches_.size());
size_t minSize = 1;
if (pendingBatches_.size() == 1) { // enforce a minimum (only needed/correct if still in first batch)
size_t minTrgWords = 256; // don't go below this number of target words, as it seems excessive --@TODO: parameterize?
minSize = 1 + (minTrgWords * batch->size() - 1) / batch->wordsTrg(); // approximately convert minTrgWords into a #sentences
}
reducedBatchSize = std::max(reducedBatchSize, minSize);
before += batch->wordsTrg();
if (reducedBatchSize < batch->size())
batch = batch->split(/*numSubBatches=*/1, reducedBatchSize).front();
after += batch->wordsTrg();
}
// load-balance: distribute the last numWarps-group's batches over GPUs
// This is tricky since batches do not have the same length, therefore we can only split, but not merge.
auto numWarps = (pendingBatches_.size() - 1) / warpSize + 1; // = ceil(#buffers / (#GPUs * #workers))
auto availableBatches = numWarps * warpSize; // we got this many GPUs anyways, so we better make use of them
if (pendingBatches_.size() < availableBatches) {
// we are not using all available GPUs -> try to load-balance a bit better
auto fullBatches = (numWarps - 1) * warpSize;
auto expandLast = pendingBatches_.size() - fullBatches;
auto toLast = availableBatches - fullBatches;
LOG(info, "attempt to redistribute {} last batches over {}", expandLast, toLast);
auto splitInto = toLast / expandLast; // unfortunately we can only split in integer ratios
// @TODO: We can do better since the last batch is typically smaller.
if (splitInto > 1) {
// split each of last numWarps's batches into 'splitInto' batches
// pop them first
std::vector<Ptr<data::Batch>> batchesToSplit;
while (pendingBatches_.size() > fullBatches) {
batchesToSplit.push_back(pendingBatches_.back());
pendingBatches_.pop_back();
}
// now split them
for (auto& batchToSplit : batchesToSplit) {
LOG(info, "{}-way splitting batchToSplit with size {}", splitInto, batchToSplit->size());
auto splitBatches = batchToSplit->split(splitInto);
for (auto& splitBatch : splitBatches) {
LOG(info, " -> getting batchToSplit with size {}", splitBatch->size());
pendingBatches_.push_back(splitBatch);
}
}
}
ABORT_IF(pendingBatches_.size() > availableBatches, "somehow split into too many batches??");
}
subBatches = std::move(pendingBatches_);
// @TODO: sort by width, so that in case of delay > 1, each GPU gets about the same size
return true;
}
void SyncGraphGroup::update(Ptr<data::Batch> newBatch) /*override*/ {
validate();
std::vector<Ptr<data::Batch>> subBatches;
bool gotSubBatches = tryGetSubBatches(newBatch, subBatches);
// not enough data yet: return right away
if (!gotSubBatches)
return;
// Helper to access the subBatches array
auto getSubBatch = [&](size_t t, size_t localDeviceIndex, size_t rank) {
auto getSubBatch = [&](size_t t, size_t localDeviceIndex, size_t rank) -> Ptr<data::Batch> {
// 't' (the delay) should be slowest changing dimension. If subBatches are sorted by
// length, then grouping sentences of similar length into the same delay step can
// reduce unnecessary time spent in padding.
return subBatches[(t * mpi_->numMPIProcesses() + rank) * devices_.size() + localDeviceIndex];
auto index = (t * mpi_->numMPIProcesses() + rank) * devices_.size() + localDeviceIndex;
if (index < subBatches.size())
return subBatches[index];
else
return nullptr;
};
// Upon very first execution, reset everything
if(first_) {
LOG(debug, "[{}] Processing first minibatch. Batches are processed as {} processes x {} GPUs/process x {} delay steps",
mpi_->idStr(), mpi_->numMPIProcesses(), devices_.size(), delay_);
LOG(info, "[training] Processing first minibatch. Batches are processed as {} processes x {} GPUs/process",
mpi_->numMPIProcesses(), devices_.size());
initialize(subBatches.front());
if(mvAvg_ && paramsAvg_.empty())
initializeAvg();
@ -133,33 +253,34 @@ void SyncGraphGroup::update(Ptr<data::Batch> batch) /*override*/ {
}
// Compute gradients
// This happens in multiple steps in case of delay_ > 1.
// This happens in multiple steps in case of delay > 1.
std::vector<float> localDeviceCosts(devices_.size(), 0.f); // [local device index] aggregate cost for each local device
for (size_t t = 0; t < delay_; t++) {
for (size_t t = 0; getSubBatch(t, 0, 0); t++) { // @TODO: rename 't' to 'delay'
// Execute single forward/backward step
auto forwardBackward = [&](size_t localDeviceIndex, size_t /*begin*/, size_t /*end*/) {
auto graph = graphs_[localDeviceIndex];
auto subBatch = getSubBatch(t, localDeviceIndex, mpi_->myMPIRank());
if(subBatch) {
timer::Timer timer;
auto costNode = builders_[localDeviceIndex]->build(graph, subBatch);
//LOG(info, timer.format(2, "after build: %ws"));
graph->forward();
//LOG(info, timer.format(2, "after forward (no sync): %ws"));
localDeviceCosts[localDeviceIndex] += costNode->scalar();
graph->backward(/*zero=*/t == 0); // only reset gradients to 0 if t = 0
//LOG(info, timer.format(2, "after backward (no sync): %ws"));
//localDeviceCosts[localDeviceIndex] += costNode->scalar(); // moved here for time measurements; @TODO: move this back
//LOG(info, timer.format(2, "after scalar() (that's a sync): %ws"));
}
else { // empty batch: execute do-nothing fw-bw step for proper inits and resets
#if 1 // @TODO: double-check whether the #else branch is the same; and if so, use it instead
graph->params()->allocateBackward();
if (t == 0) // these have already been sized
graph->params()->set_zero_adjoint();
#else
graph->clear(); // instead of build()
graph->forward();
graph->backward(/*zero=*/t == 0);
#endif
}
};
comm_->foreach(forwardBackward); // compute gradients in parallel on each device. Aggregate if delay_ > 1.
comm_->foreach(forwardBackward); // compute gradients in parallel on each device. Aggregate if delay > 1.
}
// At this point, each device on each MPI process has a gradient aggregated over a subset of the sub-batches.
@ -177,23 +298,25 @@ void SyncGraphGroup::update(Ptr<data::Batch> batch) /*override*/ {
Element(_1 = _1 / (float)div, curGrad);
}
// determine num words for dynamic hyper-parameter adjustment
size_t mbWords = OptimizerBase::mbSizeNotProvided;
if (options_->get<std::string>("cost-type") == "ce-sum") { // presently only supported for ce-sum
mbWords = 0;
for (const auto& batch : subBatches)
mbWords += batch->words(-1); // @TODO: use wordsTrg (it's the same)
}
// actual model update
shardOpt_[idx]->update(curParam, curGrad);
shardOpt_[idx]->update(curParam, curGrad, mbWords);
if(mvAvg_)
updateAvgParams(
paramsAvg_[idx], curParam, scheduler_->numberOfBatches());
paramsAvg_[idx], curParam, scheduler_->numberOfBatches(), mbWords);
};
timer::Timer timer;
comm_->scatterReduce(); // reduce gradients across all devices (globally) into shards
//LOG(info, timer.format(2, "after scatterReduce (has sync): %ws"));
comm_->foreach(update); // per-shard model-update
//LOG(info, timer.format(2, "after model update (no sync): %ws"));
//graphs_.front()->getBackend()->synchronize(); // @TODO: This is strictly for time measurement. Make sure it doesn't accidentally stay in here!!
//LOG(info, timer.format(2, "after model update sync (which is unnecessary except for time measurements): %ws"));
comm_->allGather(); // distribute param value shards back
//LOG(info, timer.format(2, "after allGather (has sync): %ws"));
// cost across all local devices (scheduler will aggregate cross-process)
float localCost = 0;
@ -202,7 +325,7 @@ void SyncGraphGroup::update(Ptr<data::Batch> batch) /*override*/ {
// if localCost is average-based, we need to turn the sum over devices into an average as well
if(options_->get<std::string>("cost-type") != "ce-sum")
localCost /= numSubBatches;
localCost /= subBatches.size();
if(scheduler_) {
// track and log localCost
@ -224,6 +347,7 @@ void SyncGraphGroup::update(Ptr<data::Batch> batch) /*override*/ {
}
void SyncGraphGroup::load() /*override*/ {
validate();
// This function loads the main parameters in the graphs.
// In case of exponential smoothing, we also need to restore paramsAvg_.
@ -253,10 +377,11 @@ void SyncGraphGroup::load() /*override*/ {
[&](const std::vector<float>& optimizerStateVector, const OptimizerBase::ScatterStateSetFunc& setShardFn) {
comm_->scatterState(optimizerStateVector, setShardFn);
});
LOG(info, "[training] Model reloaded from {}", name);
} else if(options_->has("pretrained-model")) {
std::string nameInit = options_->get<std::string>("pretrained-model");
LOG(info,
"Initialize model weights with the pre-trained model {}",
"[training] Initializing model weights with the pre-trained model {}",
nameInit);
size_t i = 0;
@ -267,44 +392,34 @@ void SyncGraphGroup::load() /*override*/ {
}
void SyncGraphGroup::save(bool final) /*override*/ {
validate();
barrier(); // (for better grouping of log messages)
//LOG(info, "[{}] save() line {}!", this->mpi_->idStr(), __LINE__);
// do final validation
if(final && scheduler_) {
// bring the smoothed model in
// Note that it is sharded. For multi-node, it is sharded over multiple machines, so this is a network access.
// Also note that the swap must run on all MPI processes concurrently, although only one actually validates.
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
swapParamsAvg();
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
if (isMainProcess()) // in multi-node, only first MPI process saves the model (they are all identical)
scheduler_->validate(graphs_, true);
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
swapParamsAvg();
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
}
std::string name = options_->get<std::string>("model");
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
barrier(); // (for better grouping of log messages)
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
// if smoothing then save original (unsmoothed) parameters as well
// @TODO: Check whether we are reloading the correct file (the unsmoothed one).
if(mvAvg_ && paramsAvg_.size() > 0 && isMainProcess()) // only save from one MPI process
// Save the original parameters in model.npz.orig.npz
builders_[0]->save(graphs_[0], name + ".orig.npz", true);
// Temporarily switch to the averaged parameters
// Note: the smoothed model is sharded across GPUs, and across MPI processes if applicable. This brings it into MPI process[*].device[*]
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
swapParamsAvg();
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
// save main model file
if (isMainProcess()) { // only save from one MPI process
// if not overwrite then save a copy with number of updates in the model pathname
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
if(!options_->get<bool>("overwrite") && !final) {
std::string numberOfBatches
= scheduler_ ? std::to_string(scheduler_->numberOfBatches())
@ -313,40 +428,34 @@ void SyncGraphGroup::save(bool final) /*override*/ {
nameOverwrite.replace(name.size() - 4, 4, ".iter" + numberOfBatches + ".npz"); // @TODO: use insert?
builders_[0]->save(graphs_[0], nameOverwrite);
}
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
// save main model file
builders_[0]->save(graphs_[0], name, true);
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
// save scheduler-related state
if (scheduler_)
scheduler_->save(name);
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
}
// Switch back to the original parameters
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
swapParamsAvg();
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
#if 0 // temporary, for testing of saving distributed models; must be identical to .orig.npz
if(mvAvg_ && paramsAvg_.size() > 0 && isMainProcess())
builders_[0]->save(graphs_[0], name + ".orig_after_swapping.npz", true);
#endif
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
barrier(); // (for better grouping of log messages)
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
// persist optimizer state
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
shardOpt_[0]->save(name + ".optimizer.npz", shardOpt_,
[&](const OptimizerBase::GatherStateGetFunc& getShardFn) {
return comm_->gatherState(getShardFn);
},
isMainProcess());
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
barrier(); // (for better grouping of log messages)
//LOG(info, "[{}] save() line {}", this->mpi_->idStr(), __LINE__);
}
void SyncGraphGroup::finalize() /*override*/ {
validate();
finalizeMPI(std::move(mpi_));
Base::finalize();
}
} // namespace marian

View File

@ -7,6 +7,7 @@
namespace marian {
class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing {
using Base = GraphGroup;
const size_t delay_{ 1 }; // optimizer-delay parameter
Ptr<ICommunicator> comm_; // [not null] communicator, e.g. NCCLCommunicator
@ -23,7 +24,10 @@ class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing {
std::vector<Ptr<TensorAllocator>> paramsAllocs_; // [deviceIndex] we must hold a reference to the memory until this class dies
// @TODO: move this nto ExponentialSmoothing, together with paramsAvg_?
bool first_{ true }; // gets interpreted and cleared by update()
// state for update()
bool first_{ true }; // gets interpreted and cleared by update()
std::vector<Ptr<data::Batch>> pendingBatches_; // in case of delay, multi-worker, and/or multi-GPU, we buffer up batches
size_t typicalTrgWords_{}; // typical batch size in words (labels); remembered from collectStats()
void initialize(const Ptr<data::Batch>& exampleBatch);
void initializeAvg();
@ -32,6 +36,8 @@ class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing {
void barrier() const { mpi_->barrier(); } // (we need this several times)
void swapParamsAvg() { if (mvAvg_ && paramsAvg_.size() > 0) comm_->swapParams(paramsAvg_); } // note: must call this on all MPI ranks in parallel
bool tryGetSubBatches(Ptr<data::Batch> newBatch, std::vector<Ptr<data::Batch>>& subBatches);
public:
SyncGraphGroup(Ptr<Options> config);
@ -42,6 +48,8 @@ public:
void load() override;
void save(bool final = false) override;
void finalize() override;
Ptr<data::BatchStats> collectStats();
// @TODO: consider to make this a virtual as well? Currently it is a template dispatch
};

View File

@ -13,12 +13,34 @@ private:
std::vector<Ptr<ValidatorBase>> validators_;
bool first_{true};
size_t typicalTrgBatchWords_{0}; // for dynamic batch sizing
Ptr<TrainingState> state_;
timer::Timer timer_, heartBeatTimer_;
float getLearningRate(TrainingState& state) {
// determine LR decay factor from --lr-decay-inv-sqrt option
float getLearningRateDecayFactor(const TrainingState& state) const {
auto args = options_->get<std::vector<std::string>>("lr-decay-inv-sqrt");
ABORT_IF(args.empty() || args.size() > 2, "--lr-decay-inv-sqrt argument must be one or two numbers with units");
auto decayGoogle = SchedulingParameter::parse(args[0]);
size_t progress = state.getProgressIn(decayGoogle.unit);
size_t start = decayGoogle.n;
if (args.size() > 1) {
auto decayStart = SchedulingParameter::parse(args[1]);
ABORT_IF(decayStart && decayStart.unit != decayGoogle.unit, "both --lr-decay-inv-sqrt arguments must have the same unit");
start = decayStart.n;
}
if (decayGoogle && progress > start) {
progress = progress - start + decayGoogle.n; // shift so that we get 1 at progress==start
return (float)(std::sqrt((double)decayGoogle.n / (double)progress));
}
else
return 1.f;
}
// determine the dynamically adjusted learning rate, incl. warm-up and decay
float getLearningRate(const TrainingState& state) const {
float baselr = options_->get<float>("learn-rate");
float mult1 = 1.f;
@ -29,11 +51,7 @@ private:
mult1 = std::min(1.f, (float)bno / (float)warmup.n);
}
float mult2 = 1.f;
auto decayGoogle = SchedulingParameter::parse(options_->get<std::string>("lr-decay-inv-sqrt"));
if(decayGoogle) {
mult2 = std::min(1.f, (float)(std::sqrt(decayGoogle.n) / std::sqrt(state.getProgressIn(decayGoogle.unit))));
}
float mult2 = getLearningRateDecayFactor(state);
baselr = baselr * mult1 * mult2;
@ -45,6 +63,54 @@ private:
}
public:
void setTypicalTrgBatchWords(size_t typicalTrgBatchWords) { // needed for tryGetDynamicMBSizeMultiplier()
typicalTrgBatchWords_ = typicalTrgBatchWords;
LOG(info, "batch size estimate is {} target words", typicalTrgBatchWords_);
}
// determine dynamic MB size, if respective parameters are given (return false if not)
bool tryGetDynamicMBSizeMultiplier(double /*out*/ &ratio) const {
auto mbWarmupOpts = options_->get<std::vector<std::string>>("mini-batch-warmup");
ABORT_IF(mbWarmupOpts.empty() || mbWarmupOpts.size() > 2, "--mini-batch-warmup argument must be one or two numbers with units");
auto mbWarmup = SchedulingParameter::parse(mbWarmupOpts[0]);
if (!mbWarmup)
return false;
ratio = 1.0;
// mini-batch-warmup
LOG_ONCE(info, "[scheduler] Mini-batch size warmup {}", std::string(mbWarmup));
// This scales MB size up from the start.
// now scale batch size relative to progress within warm-up period
size_t progress = state_->getProgressIn(mbWarmup.unit); // number of updates/labels processed
auto progressRatio = (double)progress / (double)mbWarmup.n; // where are we relatively within target warm-up period
if (mbWarmup.unit == SchedulingUnit::trgLabels)
progressRatio = std::sqrt(progressRatio);
// apply ratio to actual batch size
ratio *= progressRatio;
// adjust for reference batch size if given
// At progress == mbWarmup.n (ratio=1), we would like to have refBatchLabels instead of whichever
// the actual batch size is. We approximate the latter as typicalTrgBatchWords_, and scale ratio accordingly.
if (mbWarmupOpts.size() > 1) {
ABORT_IF(typicalTrgBatchWords_ == 0, "dynamic scaling with words target requires MB size to be known in words"); // happens if MB size is specified in sentences
auto refBatchLabels = (size_t)std::stoull(mbWarmupOpts[1]);
LOG_ONCE(info, "[scheduler] Scaling to {} reference labels. Typical actual batch words is {}", refBatchLabels, typicalTrgBatchWords_);
ratio *= (double)refBatchLabels / (double)typicalTrgBatchWords_;
}
// dynamic MB-size tracking with learning rate
// As LR goes down, MB gets ramped up by the same ratio, which has been found to be safe.
auto mbTracking = options_->get<bool>("mini-batch-track-lr");
if (mbTracking) {
auto lrFactor = getLearningRateDecayFactor(*state_);
if (lrFactor != 1)
LOG_ONCE(info, "[scheduler] Dynamic mini-batch size adjustment enabled and kicking in");
ratio /= lrFactor;
}
return true;
}
Scheduler(Ptr<Options> options, Ptr<TrainingState> state)
: options_(options), state_(state) {
state_->eta = getLearningRate(*state);
@ -120,12 +186,13 @@ public:
float value = validator->validate(graphs);
if(validator->stalled() > 0) {
LOG_VALID(info,
"Ep. {} : Up. {} : {} : {} : stalled {} times",
"Ep. {} : Up. {} : {} : {} : stalled {} times (last best: {})",
state_->epochs,
state_->batches,
validator->type(),
value,
validator->stalled());
validator->stalled(),
validator->lastBest());
} else {
LOG_VALID(info,
"Ep. {} : Up. {} : {} : {} : new best",
@ -170,13 +237,12 @@ public:
size_t batchLabels = 0; // number of target words in batch
for(const auto& batch : batches) {
if (batch) { // (nullptr is allowed as result of split)
batchSize += batch->size();
batchLabels += batch->words(-1);
}
batchSize += batch->size();
batchLabels += batch->words(-1);
}
// extrapolate cost across MPI processes, so that we have numbers in the right range
// Since batchLabels is counted across all MPI processes, we also should temporarily
// extrapolate cost across MPI processes, to have numbers in the right range.
// When doing the actual log, we then aggregate across MPI processes to get the accurate number.
if (mpi)
cost *= mpi->numMPIProcesses(); // @BUGBUG: this is presently correct for ce-sum, but possibly not the av-based losses
@ -203,42 +269,42 @@ public:
state_->samplesEpoch += batchSize; // sentences processed in this epoch
state_->labelsTotal += batchLabels; // total labels processed
state_->newBatch();
state_->newUpdate(batches.size());
if(state_->enteredNewPeriodOf(options_->get<std::string>("disp-freq")) ||
state_->batches <= options_->get<size_t>("disp-first")) {
// if MPI then aggregate precise cost across workers
if (mpi) {
//LOG(info, "all-reducing cost from {}", state_->costSum);
state_->costSum /= mpi->numMPIProcesses(); // undo the extra scaling
mpi->allReduce(&state_->costSum, &state_->costSum, 1, MPI_FLOAT, MPI_SUM);
//LOG(info, "all-reduced cost to {}", state_->costSum);
}
if (mpi && mpi->myMPIRank() != 0)
; // skip the report on alternate worker processes
else if(dispLabelCounts) {
if(options_->get<bool>("lr-report")) { // if true then show the learning rate
LOG(info,
"Ep. {} : Up. {} : Sen. {} : Cost {:.8f} * {} after {} : Time {:.2f}s : {:.2f} "
"Ep. {} : Up. {} : Sen. {} : Cost {:.8f} * {} @ {} after {} : Time {:.2f}s : {:.2f} "
"words/s : L.r. {:.4e}",
state_->epochs,
state_->batches,
utils::withCommas(state_->samplesEpoch),
state_->costSum / state_->costCount,
utils::withCommas(state_->costCount), // show cost as "av * count"
batchLabels,
utils::withCommas(state_->labelsTotal),
timer_.elapsed(),
state_->wordsDisp / timer_.elapsed(),
state_->eta);
} else {
LOG(info,
"Ep. {} : Up. {} : Sen. {} : Cost {:.8f} * {} after {} : Time {:.2f}s : {:.2f} "
"Ep. {} : Up. {} : Sen. {} : Cost {:.8f} * {} @ {} after {} : Time {:.2f}s : {:.2f} "
"words/s",
state_->epochs,
state_->batches,
utils::withCommas(state_->samplesEpoch),
state_->costSum / state_->costCount,
utils::withCommas(state_->costCount),
batchLabels,
utils::withCommas(state_->labelsTotal),
timer_.elapsed(),
state_->wordsDisp / timer_.elapsed());
@ -272,12 +338,14 @@ public:
}
// progress heartbeat for MS-internal Philly compute cluster
// This environment variable exists when running on the cluster.
using namespace std::chrono;
if((!mpi || mpi->myMPIRank() == 0) && getenv("PHILLY_JOB_ID")
&& heartBeatTimer_.elapsed<std::chrono::minutes>() >= 10) {
printf("PROGRESS: %.2f%%\nEVALERR: %.7f\n", (double)state_->epochs, state_->costSum / state_->costCount), fflush(stdout);
#if 0
LOG(info, "heart beat after {} updates", state_->batches);
#endif
printf("PROGRESS: %.2f%%\nEVALERR: %.7f%%\n",
(double)state_->epochs,
state_->costSum / state_->costCount / (mpi ? mpi->numMPIProcesses() : 1));
fflush(stdout);
std::cout << "MBSIZE: " << batchLabels << " after " << state_->batches << " updates = " << state_->labelsTotal << " labels" << std::endl << std::flush;
heartBeatTimer_.start();
}
}

View File

@ -56,7 +56,9 @@ public:
}
auto batchGenerator = New<CorpusBatchGenerator>(dataset, options_, stats);
scheduler->registerTrainingObserver(batchGenerator);
scheduler->setTypicalTrgBatchWords(batchGenerator->estimateTypicalTrgBatchWords()); // needed for dynamic MB scaling
auto model = New<ModelWrapper>(options_);
model->setScheduler(scheduler);
@ -85,12 +87,14 @@ public:
}
scheduler->finished();
model->finalize();
// Avoid saving the model twice if it has been loaded and training did not
// progress
if(!trainState->loaded)
model->save(true);
// finalize, including communicating successful completion to MPI
// @BUGBUG: This is wrong for async, but needed for sync. How to solve it?
model->finalize();
}
};
} // namespace marian

View File

@ -2,6 +2,7 @@
#include "common/definitions.h"
#include "common/filesystem.h"
#include "common/utils.h"
#include <fstream>
#include <vector>
@ -42,7 +43,9 @@ struct SchedulingParameter {
}
param.pop_back();
}
res.n = (size_t)std::stoull(param);
double number = utils::parseNumber(param);
res.n = (size_t)number;
ABORT_IF(number != (double)res.n, "Scheduling parameters must be whole numbers");
return res;
}
@ -62,9 +65,9 @@ class TrainingState {
public:
// Current epoch
size_t epochs{1};
// The total number of batches (=updates) processed since beginning of training --@TODO: rename to 'updates'
// The total number of updates since beginning of training --@TODO: rename to 'updates'
size_t batches{0};
// The number of batches seen in this epoch --@TODO: rename to 'updatesEpoch' or 'updatesInCurrentEpoch'
// The number of batches seen in this epoch --note: not updates; an update can consist of multiple batches
size_t batchesEpoch{0};
// The number of sentences seen in this epoch --@TODO: rename to 'sentencesEpoch'
size_t samplesEpoch{0};
@ -172,9 +175,9 @@ public:
batchesEpoch = 0;
}
void newBatch() {
void newUpdate(size_t batchesInUpdate) {
++batches;
++batchesEpoch;
batchesEpoch += batchesInUpdate;
loaded = false;
validated = false;
for(auto observer : observers_)

View File

@ -137,7 +137,7 @@ protected:
lastBest_ = val;
if(options_->get<bool>("keep-best"))
keepBest(graphs);
} else {
} else if (lastBest_ != val) { // (special case 0 at start) @TODO: needed? Seems stall count gets reset each time it does improve. If not needed, remove "if(...)" again.
stalled_++;
}
}
@ -166,7 +166,6 @@ public:
protected:
virtual float validateBG(const std::vector<Ptr<ExpressionGraph>>& graphs) override {
auto ctype = options_->get<std::string>("cost-type");
options_->set("cost-type", "ce-sum");

View File

@ -1,377 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ALL_BUILD", "ALL_BUILD.vcxproj", "{5216F769-E887-369E-AD1E-D6A1F69E834E}"
ProjectSection(ProjectDependencies) = postProject
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33} = {17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{5AF43E07-5917-3D8F-9BF0-B41F698242EA} = {5AF43E07-5917-3D8F-9BF0-B41F698242EA}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{3CD61EAE-244E-33AB-8C7D-F5182481E033} = {3CD61EAE-244E-33AB-8C7D-F5182481E033}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{25A05D30-AFC2-3F0E-B475-0B2B81530151} = {25A05D30-AFC2-3F0E-B475-0B2B81530151}
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7} = {8A6B1F60-8E2D-3171-828B-07E732C8E7D7}
{3784D69C-33A9-33A7-A557-F809EF2F4D34} = {3784D69C-33A9-33A7-A557-F809EF2F4D34}
{EA3973A2-F92E-3124-9817-81B2458EC8DC} = {EA3973A2-F92E-3124-9817-81B2458EC8DC}
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D} = {36953645-6D01-37E4-ACF7-D3F9BFFCA49D}
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162} = {F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
{5857EF98-C87F-3197-A399-F0F9A20913FC} = {5857EF98-C87F-3197-A399-F0F9A20913FC}
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F} = {F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}
{FBB107B9-523B-3094-95CF-A103E2388006} = {FBB107B9-523B-3094-95CF-A103E2388006}
{5B4A6D26-C638-3350-9E1A-0F987C448DEC} = {5B4A6D26-C638-3350-9E1A-0F987C448DEC}
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F} = {11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}
{1134F859-3DE4-34B1-924F-82CA38D4D4F3} = {1134F859-3DE4-34B1-924F-82CA38D4D4F3}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "INSTALL", "INSTALL.vcxproj", "{9DAF8CA3-052E-3480-A332-34676CAE852B}"
ProjectSection(ProjectDependencies) = postProject
{5216F769-E887-369E-AD1E-D6A1F69E834E} = {5216F769-E887-369E-AD1E-D6A1F69E834E}
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PACKAGE", "PACKAGE.vcxproj", "{3A3C6EA5-65CD-324E-90F4-6B4D70DD5A37}"
ProjectSection(ProjectDependencies) = postProject
{5216F769-E887-369E-AD1E-D6A1F69E834E} = {5216F769-E887-369E-AD1E-D6A1F69E834E}
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SQLiteCpp", "src\3rd_party\SQLiteCpp\SQLiteCpp.vcxproj", "{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ZERO_CHECK", "ZERO_CHECK.vcxproj", "{806A44E1-15D4-3368-B0B9-2A6CC352D505}"
ProjectSection(ProjectDependencies) = postProject
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libyaml-cpp", "src\3rd_party\yaml-cpp\libyaml-cpp.vcxproj", "{5AF43E07-5917-3D8F-9BF0-B41F698242EA}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian", "src\marian.vcxproj", "{885D3D2B-7278-30EF-BB1B-50E83D1635C4}"
ProjectSection(ProjectDependencies) = postProject
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33} = {17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{5AF43E07-5917-3D8F-9BF0-B41F698242EA} = {5AF43E07-5917-3D8F-9BF0-B41F698242EA}
{55A27783-64A4-3AA7-A4B1-49C4B628F18C} = {55A27783-64A4-3AA7-A4B1-49C4B628F18C}
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162} = {F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}
{1134F859-3DE4-34B1-924F-82CA38D4D4F3} = {1134F859-3DE4-34B1-924F-82CA38D4D4F3}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_conv", "src\marian_conv.vcxproj", "{3CD61EAE-244E-33AB-8C7D-F5182481E033}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_cuda", "src\marian_cuda.vcxproj", "{97131187-E592-3981-886F-222EE20FB669}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_decoder", "src\marian_decoder.vcxproj", "{25A05D30-AFC2-3F0E-B475-0B2B81530151}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_scorer", "src\marian_scorer.vcxproj", "{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_server", "src\marian_server.vcxproj", "{3784D69C-33A9-33A7-A557-F809EF2F4D34}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_train", "src\marian_train.vcxproj", "{EA3973A2-F92E-3124-9817-81B2458EC8DC}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_version", "src\marian_version.vcxproj", "{55A27783-64A4-3AA7-A4B1-49C4B628F18C}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "marian_vocab", "src\marian_vocab.vcxproj", "{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{885D3D2B-7278-30EF-BB1B-50E83D1635C4} = {885D3D2B-7278-30EF-BB1B-50E83D1635C4}
{97131187-E592-3981-886F-222EE20FB669} = {97131187-E592-3981-886F-222EE20FB669}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pathie-cpp", "src\3rd_party\pathie-cpp\pathie-cpp.vcxproj", "{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sentencepiece-static", "src\3rd_party\sentencepiece\src\sentencepiece-static.vcxproj", "{D9D20410-4011-370C-8E15-A6F5C311F337}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sentencepiece_train-static", "src\3rd_party\sentencepiece\src\sentencepiece_train-static.vcxproj", "{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spm_decode", "src\3rd_party\sentencepiece\src\spm_decode.vcxproj", "{5857EF98-C87F-3197-A399-F0F9A20913FC}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spm_encode", "src\3rd_party\sentencepiece\src\spm_encode.vcxproj", "{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spm_export_vocab", "src\3rd_party\sentencepiece\src\spm_export_vocab.vcxproj", "{FBB107B9-523B-3094-95CF-A103E2388006}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spm_normalize", "src\3rd_party\sentencepiece\src\spm_normalize.vcxproj", "{5B4A6D26-C638-3350-9E1A-0F987C448DEC}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spm_train", "src\3rd_party\sentencepiece\src\spm_train.vcxproj", "{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
{D9D20410-4011-370C-8E15-A6F5C311F337} = {D9D20410-4011-370C-8E15-A6F5C311F337}
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678} = {4A20AD5F-7334-31D3-B31D-9AAF53CC6678}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zlib", "src\3rd_party\zlib\zlib.vcxproj", "{1134F859-3DE4-34B1-924F-82CA38D4D4F3}"
ProjectSection(ProjectDependencies) = postProject
{806A44E1-15D4-3368-B0B9-2A6CC352D505} = {806A44E1-15D4-3368-B0B9-2A6CC352D505}
EndProjectSection
VisualStudioVersion = 15.0.27703.2047
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Marian", "Marian.vcxproj", "{E2F320FE-0C01-4C80-810C-3A92205A29DC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
MinSizeRel|x64 = MinSizeRel|x64
RelWithDebInfo|x64 = RelWithDebInfo|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{5216F769-E887-369E-AD1E-D6A1F69E834E}.Debug|x64.ActiveCfg = Debug|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.Debug|x64.Build.0 = Debug|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.Release|x64.ActiveCfg = Release|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.Release|x64.Build.0 = Release|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{5216F769-E887-369E-AD1E-D6A1F69E834E}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{9DAF8CA3-052E-3480-A332-34676CAE852B}.Debug|x64.ActiveCfg = Debug|x64
{9DAF8CA3-052E-3480-A332-34676CAE852B}.Release|x64.ActiveCfg = Release|x64
{9DAF8CA3-052E-3480-A332-34676CAE852B}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{9DAF8CA3-052E-3480-A332-34676CAE852B}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{3A3C6EA5-65CD-324E-90F4-6B4D70DD5A37}.Debug|x64.ActiveCfg = Debug|x64
{3A3C6EA5-65CD-324E-90F4-6B4D70DD5A37}.Release|x64.ActiveCfg = Release|x64
{3A3C6EA5-65CD-324E-90F4-6B4D70DD5A37}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{3A3C6EA5-65CD-324E-90F4-6B4D70DD5A37}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.Debug|x64.ActiveCfg = Debug|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.Debug|x64.Build.0 = Debug|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.Release|x64.ActiveCfg = Release|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.Release|x64.Build.0 = Release|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{17E8F84B-76CD-326B-B50A-C4F3C3A8CE33}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.Debug|x64.ActiveCfg = Debug|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.Debug|x64.Build.0 = Debug|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.Release|x64.ActiveCfg = Release|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.Release|x64.Build.0 = Release|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{806A44E1-15D4-3368-B0B9-2A6CC352D505}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.Debug|x64.ActiveCfg = Debug|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.Debug|x64.Build.0 = Debug|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.Release|x64.ActiveCfg = Release|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.Release|x64.Build.0 = Release|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{5AF43E07-5917-3D8F-9BF0-B41F698242EA}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.Debug|x64.ActiveCfg = Debug|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.Debug|x64.Build.0 = Debug|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.Release|x64.ActiveCfg = Release|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.Release|x64.Build.0 = Release|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{885D3D2B-7278-30EF-BB1B-50E83D1635C4}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.Debug|x64.ActiveCfg = Debug|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.Debug|x64.Build.0 = Debug|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.Release|x64.ActiveCfg = Release|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.Release|x64.Build.0 = Release|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{3CD61EAE-244E-33AB-8C7D-F5182481E033}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{97131187-E592-3981-886F-222EE20FB669}.Debug|x64.ActiveCfg = Debug|x64
{97131187-E592-3981-886F-222EE20FB669}.Debug|x64.Build.0 = Debug|x64
{97131187-E592-3981-886F-222EE20FB669}.Release|x64.ActiveCfg = Release|x64
{97131187-E592-3981-886F-222EE20FB669}.Release|x64.Build.0 = Release|x64
{97131187-E592-3981-886F-222EE20FB669}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{97131187-E592-3981-886F-222EE20FB669}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{97131187-E592-3981-886F-222EE20FB669}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{97131187-E592-3981-886F-222EE20FB669}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.Debug|x64.ActiveCfg = Debug|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.Debug|x64.Build.0 = Debug|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.Release|x64.ActiveCfg = Release|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.Release|x64.Build.0 = Release|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{25A05D30-AFC2-3F0E-B475-0B2B81530151}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.Debug|x64.ActiveCfg = Debug|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.Debug|x64.Build.0 = Debug|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.Release|x64.ActiveCfg = Release|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.Release|x64.Build.0 = Release|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{8A6B1F60-8E2D-3171-828B-07E732C8E7D7}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.Debug|x64.ActiveCfg = Debug|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.Debug|x64.Build.0 = Debug|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.Release|x64.ActiveCfg = Release|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.Release|x64.Build.0 = Release|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{3784D69C-33A9-33A7-A557-F809EF2F4D34}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.Debug|x64.ActiveCfg = Debug|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.Debug|x64.Build.0 = Debug|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.Release|x64.ActiveCfg = Release|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.Release|x64.Build.0 = Release|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{EA3973A2-F92E-3124-9817-81B2458EC8DC}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.Debug|x64.ActiveCfg = Debug|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.Debug|x64.Build.0 = Debug|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.Release|x64.ActiveCfg = Release|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.Release|x64.Build.0 = Release|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{55A27783-64A4-3AA7-A4B1-49C4B628F18C}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.Debug|x64.ActiveCfg = Debug|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.Debug|x64.Build.0 = Debug|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.Release|x64.ActiveCfg = Release|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.Release|x64.Build.0 = Release|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{36953645-6D01-37E4-ACF7-D3F9BFFCA49D}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.Debug|x64.ActiveCfg = Debug|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.Debug|x64.Build.0 = Debug|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.Release|x64.ActiveCfg = Release|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.Release|x64.Build.0 = Release|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{F4AD2C38-E6B9-3C4A-A281-4AB7440D6162}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.Debug|x64.ActiveCfg = Debug|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.Debug|x64.Build.0 = Debug|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.Release|x64.ActiveCfg = Release|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.Release|x64.Build.0 = Release|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{D9D20410-4011-370C-8E15-A6F5C311F337}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.Debug|x64.ActiveCfg = Debug|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.Debug|x64.Build.0 = Debug|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.Release|x64.ActiveCfg = Release|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.Release|x64.Build.0 = Release|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{4A20AD5F-7334-31D3-B31D-9AAF53CC6678}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.Debug|x64.ActiveCfg = Debug|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.Debug|x64.Build.0 = Debug|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.Release|x64.ActiveCfg = Release|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.Release|x64.Build.0 = Release|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{5857EF98-C87F-3197-A399-F0F9A20913FC}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.Debug|x64.ActiveCfg = Debug|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.Debug|x64.Build.0 = Debug|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.Release|x64.ActiveCfg = Release|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.Release|x64.Build.0 = Release|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{F6E7B14E-D9E6-343C-B58D-CA0381A3BB8F}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.Debug|x64.ActiveCfg = Debug|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.Debug|x64.Build.0 = Debug|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.Release|x64.ActiveCfg = Release|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.Release|x64.Build.0 = Release|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{FBB107B9-523B-3094-95CF-A103E2388006}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.Debug|x64.ActiveCfg = Debug|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.Debug|x64.Build.0 = Debug|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.Release|x64.ActiveCfg = Release|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.Release|x64.Build.0 = Release|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{5B4A6D26-C638-3350-9E1A-0F987C448DEC}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.Debug|x64.ActiveCfg = Debug|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.Debug|x64.Build.0 = Debug|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.Release|x64.ActiveCfg = Release|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.Release|x64.Build.0 = Release|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{11AB9AE9-CF65-341B-B425-9EDFC4E2F22F}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.Debug|x64.ActiveCfg = Debug|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.Debug|x64.Build.0 = Debug|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.Release|x64.ActiveCfg = Release|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.Release|x64.Build.0 = Release|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.MinSizeRel|x64.Build.0 = MinSizeRel|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
{1134F859-3DE4-34B1-924F-82CA38D4D4F3}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
{E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.ActiveCfg = Debug|x64
{E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.Build.0 = Debug|x64
{E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.ActiveCfg = Release|x64
{E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A73289FB-DB51-3D6F-802E-B474CC102EDA}
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
SolutionGuid = {8CA1BE8F-87A9-4094-B549-E8C790F79D8C}
EndGlobalSection
EndGlobal

View File

@ -76,7 +76,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>zlib.lib; msmpi.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>zlib.lib;msmpi.lib;mkl_intel_ilp64.lib;mkl_sequential.lib;mkl_core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>100000000</StackReserveSize>
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
</Link>
@ -106,13 +106,20 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>zlib.lib; msmpi.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>zlib.lib;msmpi.lib;mkl_intel_ilp64.lib;mkl_sequential.lib;mkl_core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>100000000</StackReserveSize>
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\src\3rd_party\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\entry_iterator.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\errors.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\path.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie_ifstream.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie_ofstream.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\temp.cpp" />
<ClCompile Include="..\src\3rd_party\sentencepiece\src\bpe_model.cc">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
@ -316,6 +323,103 @@
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp" />
<ClCompile Include="..\src\3rd_party\yaml-cpp\yaml-node.cpp" />
<ClInclude Include="..\src\3rd_party\ExceptionWithCallStack.h" />
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\collectives.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\all_gather.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\all_reduce.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\broadcast.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\common.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\common_kernel.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\ll_kernel.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\primitives.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce_kernel.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce_scatter.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\bootstrap.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\common_coll.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\core.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\debug.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\enqueue.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\group.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\ibvwrap.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nccl_net.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\net.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nvlink.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nvmlwrap.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\param.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\ring.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\rings.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\shm.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\socket.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\topo.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\transport.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\utils.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\entry_iterator.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\errors.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\path.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie_ifstream.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie_ofstream.hpp" />
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\temp.hpp" />
<ClInclude Include="..\src\3rd_party\sentencepiece\src\bpe_model.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
@ -459,7 +563,6 @@
<ClCompile Include="..\src\data\corpus_nbest.cpp" />
<ClCompile Include="..\src\data\text_input.cpp" />
<ClCompile Include="..\src\3rd_party\cnpy\cnpy.cpp" />
<ClCompile Include="..\src\3rd_party\svd\svd.cpp" />
<ClCompile Include="..\src\layers\loss.cpp" />
<ClCompile Include="..\src\layers\weight.cpp" />
<ClCompile Include="..\src\microsoft\quicksand.cpp">
@ -620,8 +723,6 @@
<ClInclude Include="..\src\3rd_party\spdlog\tests\catch.hpp" />
<ClInclude Include="..\src\3rd_party\spdlog\tests\includes.h" />
<ClInclude Include="..\src\3rd_party\spdlog\tests\utils.h" />
<ClInclude Include="..\src\3rd_party\svd\defs_and_types.h" />
<ClInclude Include="..\src\3rd_party\svd\svd.h" />
<ClInclude Include="..\src\3rd_party\yaml-cpp\anchor.h" />
<ClInclude Include="..\src\3rd_party\yaml-cpp\binary.h" />
<ClInclude Include="..\src\3rd_party\yaml-cpp\collectionstack.h" />
@ -738,7 +839,6 @@
<ClInclude Include="..\src\models\decoder.h" />
<ClInclude Include="..\src\models\encoder.h" />
<ClInclude Include="..\src\models\encoder_decoder.h" />
<ClInclude Include="..\src\models\hardatt.h" />
<ClInclude Include="..\src\models\model_base.h" />
<ClInclude Include="..\src\models\model_factory.h" />
<ClInclude Include="..\src\models\model_task.h" />
@ -808,6 +908,96 @@
<ClInclude Include="..\src\translator\translator.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\all_gather.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\all_reduce.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\broadcast.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\all_gather.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\all_reduce.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\broadcast.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\functions.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\Makefile">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\reduce.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\reduce_scatter.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\reduce.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\reduce_scatter.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\init.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\Makefile">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\enqueue.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\group.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\ibvwrap.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\nvmlwrap.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\rings.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\utils.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\nccl.h.in">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\ring.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net_ib.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net_socket.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\p2p.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\shm.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>
<None Include="..\src\3rd_party\pathie-cpp\CHANGELOG" />
<None Include="..\src\3rd_party\pathie-cpp\LICENSE" />
<None Include="..\src\3rd_party\pathie-cpp\README.md" />
<None Include="..\src\3rd_party\sentencepiece\src\sentencepiece.proto">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
@ -846,6 +1036,7 @@
<None Include=".editorConfig" />
</ItemGroup>
<ItemGroup>
<Text Include="..\src\3rd_party\pathie-cpp\CMakeLists.txt" />
<Text Include="..\src\3rd_party\sentencepiece\src\CMakeLists.txt">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>

View File

@ -31,9 +31,6 @@
<ClCompile Include="..\src\3rd_party\cnpy\cnpy.cpp">
<Filter>3rd_party\cnpy</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\svd\svd.cpp">
<Filter>3rd_party\svd</Filter>
</ClCompile>
<ClCompile Include="..\src\tensors\backend.cpp">
<Filter>tensors</Filter>
</ClCompile>
@ -421,8 +418,26 @@
<ClCompile Include="..\src\rescorer\score_collector.cpp">
<Filter>rescorer</Filter>
</ClCompile>
<ClCompile Include="..\src\command\marian_train.cpp">
<Filter>command</Filter>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\entry_iterator.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\errors.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\path.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie_ifstream.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\pathie_ofstream.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\temp.cpp">
<Filter>3rd_party\pathie-cpp\src</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
@ -640,12 +655,6 @@
<ClInclude Include="..\src\3rd_party\spdlog\tests\utils.h">
<Filter>3rd_party\spdlog\tests</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\svd\defs_and_types.h">
<Filter>3rd_party\svd</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\svd\svd.h">
<Filter>3rd_party\svd</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\yaml-cpp\anchor.h">
<Filter>3rd_party\yaml-cpp</Filter>
</ClInclude>
@ -988,9 +997,6 @@
<ClInclude Include="..\src\models\encoder_decoder.h">
<Filter>models</Filter>
</ClInclude>
<ClInclude Include="..\src\models\hardatt.h">
<Filter>models</Filter>
</ClInclude>
<ClInclude Include="..\src\models\model_base.h">
<Filter>models</Filter>
</ClInclude>
@ -1345,6 +1351,118 @@
<ClInclude Include="..\src\3rd_party\sentencepiece\src\word_model_trainer.h">
<Filter>3rd_party\sentencepiece\src</Filter>
</ClInclude>
<ClInclude Include="..\src\command\marian_train.cpp" />
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\collectives.h">
<Filter>3rd_party\nccl\src\collectives</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\all_gather.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\all_reduce.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\broadcast.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\common.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\common_kernel.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\ll_kernel.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\primitives.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce_kernel.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\device\reduce_scatter.h">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\bootstrap.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\common_coll.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\core.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\debug.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\enqueue.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\group.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\ibvwrap.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nccl_net.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\net.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nvlink.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\nvmlwrap.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\param.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\ring.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\rings.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\shm.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\socket.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\topo.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\transport.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\nccl\src\include\utils.h">
<Filter>3rd_party\nccl\src\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\entry_iterator.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\errors.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\path.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie_ifstream.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\pathie_ofstream.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\pathie-cpp\include\temp.hpp">
<Filter>3rd_party\pathie-cpp\include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="3rd_party">
@ -1386,9 +1504,6 @@
<Filter Include="3rd_party\spdlog\tests">
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0041}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\svd">
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0044}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\yaml-cpp">
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0047}</UniqueIdentifier>
</Filter>
@ -1482,6 +1597,36 @@
<Filter Include="3rd_party\sentencepiece\src">
<UniqueIdentifier>{638bf0e1-4f83-4b37-9077-2be549d75909}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl">
<UniqueIdentifier>{0ba105eb-79fb-4e2a-8940-f1ecebbcd4fe}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src">
<UniqueIdentifier>{fbc17f5e-3f10-44a9-b3ad-66ce12573174}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src\collectives">
<UniqueIdentifier>{c6036c35-5848-4fd5-b1a0-59e2042cbb69}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src\misc">
<UniqueIdentifier>{7b9a131d-9e0a-4c28-8a51-08232ff2e35e}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src\transport">
<UniqueIdentifier>{0bd9cca8-660b-46f6-aac6-691fb50245f0}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src\include">
<UniqueIdentifier>{2beba56f-5dda-4994-bef0-16170b6552b4}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\nccl\src\collectives\device">
<UniqueIdentifier>{ac585624-4e66-42cd-8e4e-62cb90029610}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\pathie-cpp">
<UniqueIdentifier>{825beb7c-2997-408b-af81-34ab5f14593a}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\pathie-cpp\include">
<UniqueIdentifier>{db1dd5a2-f331-495d-9e3b-6dc1c01528ab}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\pathie-cpp\src">
<UniqueIdentifier>{5d5ee615-192f-4b7f-bdfd-fb8316ceabc8}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include=".editorConfig" />
@ -1524,10 +1669,109 @@
<None Include="..\src\3rd_party\sentencepiece\src\sentencepiece_model.proto">
<Filter>3rd_party\sentencepiece\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\init.cu">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\Makefile">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\nccl.h.in">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\ring.cu">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport.cu">
<Filter>3rd_party\nccl\src</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\all_gather.cu">
<Filter>3rd_party\nccl\src\collectives</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\all_reduce.cu">
<Filter>3rd_party\nccl\src\collectives</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\broadcast.cu">
<Filter>3rd_party\nccl\src\collectives</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\reduce.cu">
<Filter>3rd_party\nccl\src\collectives</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\reduce_scatter.cu">
<Filter>3rd_party\nccl\src\collectives</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\all_gather.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\all_reduce.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\broadcast.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\functions.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\Makefile">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\reduce.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\collectives\device\reduce_scatter.cu">
<Filter>3rd_party\nccl\src\collectives\device</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\enqueue.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\group.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\ibvwrap.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\nvmlwrap.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\rings.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\misc\utils.cu">
<Filter>3rd_party\nccl\src\misc</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net.cu">
<Filter>3rd_party\nccl\src\transport</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net_ib.cu">
<Filter>3rd_party\nccl\src\transport</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\net_socket.cu">
<Filter>3rd_party\nccl\src\transport</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\p2p.cu">
<Filter>3rd_party\nccl\src\transport</Filter>
</None>
<None Include="..\src\3rd_party\nccl\src\transport\shm.cu">
<Filter>3rd_party\nccl\src\transport</Filter>
</None>
<None Include="..\src\3rd_party\pathie-cpp\CHANGELOG">
<Filter>3rd_party\pathie-cpp</Filter>
</None>
<None Include="..\src\3rd_party\pathie-cpp\LICENSE">
<Filter>3rd_party\pathie-cpp</Filter>
</None>
<None Include="..\src\3rd_party\pathie-cpp\README.md">
<Filter>3rd_party\pathie-cpp</Filter>
</None>
</ItemGroup>
<ItemGroup>
<Text Include="..\src\3rd_party\sentencepiece\src\CMakeLists.txt">
<Filter>3rd_party\sentencepiece\src</Filter>
</Text>
<Text Include="..\src\3rd_party\pathie-cpp\CMakeLists.txt">
<Filter>3rd_party\pathie-cpp</Filter>
</Text>
</ItemGroup>
</Project>