diff --git a/README.md b/README.md index 6bee418b..67685375 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ Installation Requirements: -* g++ with C++14 +* g++ with c++11 * CUDA and CuDNN +* Boost (>= 1.56) Exporting some paths for CuDNN may be required (put it, for example, in your `.bashrc` file): export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin - export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64 + export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64 export CPATH=$CPATH:/usr/local/cudnn-5/include Compilation with `cmake > 3.5`: diff --git a/marian/.cproject b/marian/.cproject index 2d8c666b..48ccc0b2 100644 --- a/marian/.cproject +++ b/marian/.cproject @@ -56,11 +56,11 @@ - - + + - + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb121111..6dc37391 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,6 +5,7 @@ cuda_add_library(marian_lib cnpy/cnpy.cpp exception.cpp expressions.cu + sgd.cu tensor.cu tensor_operators.cu ) diff --git a/src/sgd.cu b/src/sgd.cu new file mode 100644 index 00000000..26121f2f --- /dev/null +++ b/src/sgd.cu @@ -0,0 +1,143 @@ +#include +#include +#include +#include "sgd.h" +#include "thrust_functions.h" + +using namespace std; + +namespace marian { +SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY, + const std::vector params, float eta, + std::vector& xData, size_t numFeatures, + std::vector& yData, size_t numClasses, + size_t epochs, size_t batchSize) +: cost_function_(&cost_func), + inX_(&inX), + inY_(&inY), + params_(params), + eta_(eta), + xData_(xData), + numFeatures_(numFeatures), + yData_(yData), + numClasses_(numClasses), + epochs_(epochs), + maxBatchSize_(batchSize) +{} + +void SGD::Run() +{ + std::srand ( unsigned ( std::time(0) ) ); + + size_t numExamples = xData_.size()/ numFeatures_; + Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f); + Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f); + + vector shuffle = CreateShuffle(numExamples); + //vector shuffle; + + for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) { + std::cerr << "Starting epoch #" << numEpoch << std::endl; + size_t startId = 0; + size_t endId = startId + maxBatchSize_; + + while (endId < numExamples) { + PrepareBatch(startId, endId, maxBatchSize_, shuffle, xt, yt); + *inX_ = xt; + *inY_ = yt; + + cost_function_->forward(maxBatchSize_); + cost_function_->backward(); + + UpdateModel(); + + startId += maxBatchSize_; + endId += maxBatchSize_; + } + } +} + +std::vector SGD::CreateShuffle(size_t numExamples) const { + vector ret(numExamples); + std::iota(ret.begin(), ret.end(), 0); + std::random_shuffle ( ret.begin(), ret.end() ); + /* + cerr << "shuffled" << endl; + for (size_t i = 0; i < ret.size(); ++i) { + cerr << ret[i] << " "; + } + */ + return ret; +} + +void SGD::PrepareBatch( + size_t startId, + size_t endId, + size_t batchSize, + const std::vector &shuffle, + Tensor& xt, + Tensor& yt) { + /* + std::vector x(xData_.begin() + startId * numFeatures_, + xData_.begin() + endId * numFeatures_); + std::vector y(yData_.begin() + startId * numClasses_, + yData_.begin() + endId * numClasses_); + */ + std::vector x(batchSize * numFeatures_); + std::vector y(batchSize * numClasses_); + + /* + cerr << "startId=" << startId + << " " << endId + << " " << batchSize + << endl; + cerr << "numExamples=" << shuffle.size() << endl; + cerr << "numFeatures_=" << numFeatures_ << " " << numClasses_ << endl; + cerr << "sizes=" << x.size() + << " " << y.size() + << " " << xData_.size() + << " " << yData_.size() + << endl; + */ + size_t startXId = 0; + size_t startYId = 0; + + for (size_t i = startId; i < endId; ++i) { + size_t ind = shuffle[i]; + size_t startXDataId = ind * numFeatures_; + size_t startYDataId = ind * numClasses_; + + size_t endXDataId = startXDataId + numFeatures_; + size_t endYDataId = startYDataId + numClasses_; + /* + cerr << "i=" << i + << " " << ind + << " " << startXDataId << "-" << endXDataId + << " " << startYDataId << "-" << endYDataId + << endl; + */ + std::copy(xData_.begin() + startXDataId, + xData_.begin() + endXDataId, + x.begin() + startXId); + + std::copy(yData_.begin() + startYDataId, + yData_.begin() + endYDataId, + y.begin() + startYId); + + startXId += numFeatures_; + startYId += numClasses_; + } + + xt.set(x); + yt.set(y); +} + +void SGD::UpdateModel() { + for (auto& param : params_) { + using namespace thrust::placeholders; + Element(_1 = _1 - eta_ * _2, param->val(), param->grad()); + } +} + +} // namespace + diff --git a/src/sgd.h b/src/sgd.h index 0dab8df0..33364049 100644 --- a/src/sgd.h +++ b/src/sgd.h @@ -5,6 +5,7 @@ #include "expressions.h" #include "thrust_functions.h" +#include "tensor_operators.h" namespace marian { @@ -14,67 +15,14 @@ class SGD { const std::vector params, float eta, std::vector& xData, size_t numFeatures, std::vector& yData, size_t numClasses, - size_t epochs, size_t batchSize) - : cost_function_(&cost_func), - inX_(&inX), - inY_(&inY), - params_(params), - eta_(eta), - xData_(xData), - numFeatures_(numFeatures), - yData_(yData), - numClasses_(numClasses), - epochs_(epochs), - batchSize_(batchSize) - {} + size_t epochs, size_t batchSize); - void Run() { - size_t numExamples = xData_.size()/ numFeatures_; - Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f); - Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f); - - for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) { - std::cerr << "Starting epoch #" << numEpoch << std::endl; - size_t startId = 0; - size_t endId = startId + batchSize_; - - while (endId < numExamples) { - PrepareBatch(startId, endId, xt, yt); - *inX_ = xt; - *inY_ = yt; - - cost_function_->forward(batchSize_); - cost_function_->backward(); - - UpdateModel(); - - startId += batchSize_; - endId += batchSize_; - } - } - } - - void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) { - std::vector x(xData_.begin() + startId * numFeatures_, - xData_.begin() + endId * numFeatures_); - std::vector y(yData_.begin() + startId * numClasses_, - yData_.begin() + endId * numClasses_); - - xt.set(x); - yt.set(y); - } - - void UpdateModel() { - for (auto& param : params_) { - using namespace thrust::placeholders; - Element(_1 = _1 - eta_ * _2, param->val(), param->grad()); - } - } + void Run(); private: - std::shared_ptr cost_function_; - std::shared_ptr inX_; - std::shared_ptr inY_; + Expr *cost_function_; + Expr *inX_; + Expr *inY_; std::vector params_; const float eta_; std::vector& xData_; @@ -82,7 +30,18 @@ class SGD { std::vector& yData_; const size_t numClasses_; const size_t epochs_; - const size_t batchSize_; + const size_t maxBatchSize_; + + std::vector CreateShuffle(size_t numExamples) const; + void PrepareBatch( + size_t startId, + size_t endId, + size_t batchSize, + const std::vector &shuffle, + Tensor& xt, + Tensor& yt); + + void UpdateModel(); }; } // namespace marian diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index 9d9cdf8b..43e1fedc 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -21,7 +21,7 @@ int main(int argc, char** argv) { std::cerr << "Done." << std::endl; std::cerr << "Loading model params..."; - NpzConverter converter("../scripts/test_model/model.npz"); + NpzConverter converter("../scripts/test_model_single/model.npz"); std::vector wData, bData; Shape wShape, bShape; diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu index ac4e7359..1c66198a 100644 --- a/src/validate_mnist_batch.cu +++ b/src/validate_mnist_batch.cu @@ -21,22 +21,39 @@ int main(int argc, char** argv) { std::cerr << "\tDone." << std::endl; std::cerr << "Loading model params..."; - NpzConverter converter("../scripts/test_model/model.npz"); - std::vector wData; - Shape wShape; - converter.Load("weights", wData, wShape); + NpzConverter converter("../scripts/test_model_single/model.npz"); - std::vector bData; - Shape bShape; - converter.Load("bias", bData, bShape); + std::vector wData1; + Shape wShape1; + converter.Load("weights1", wData1, wShape1); + + std::vector bData1; + Shape bShape1; + converter.Load("bias1", bData1, bShape1); + + std::vector wData2; + Shape wShape2; + converter.Load("weights2", wData2, wShape2); + + std::vector bData2; + Shape bShape2; + converter.Load("bias2", bData2, bShape2); - auto initW = [wData](Tensor t) { - t.set(wData); + auto initW1 = [wData1](Tensor t) { + t.set(wData1); }; - auto initB = [bData](Tensor t) { - t.set(bData); + auto initB1 = [bData1](Tensor t) { + t.set(bData1); + }; + + auto initW2 = [wData2](Tensor t) { + t.set(wData2); + }; + + auto initB2 = [bData2](Tensor t) { + t.set(bData2); }; std::cerr << "\tDone." << std::endl; @@ -45,11 +62,15 @@ int main(int argc, char** argv) { auto x = input(shape={whatevs, IMAGE_SIZE}, name="X"); auto y = input(shape={whatevs, LABEL_SIZE}, name="Y"); - auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); - auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); + auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1); + auto b1 = param(shape={1, 100}, name="b0", init=initB1); + auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2); + auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2); std::cerr << "Building model..."; - auto predict = softmax(dot(x, w) + b, axis=1, name="pred"); + auto layer1 = tanh(dot(x, w1) + b1); + auto layer2 = softmax(dot(layer1, w2) + b2, axis=1, name="layer2"); + auto predict = layer2; std::cerr << "Done." << std::endl; @@ -77,6 +98,7 @@ int main(int argc, char** argv) { if (testLabels[startId * LABEL_SIZE + i + j]) correct = j; if (results[i + j] > results[i + predicted]) predicted = j; } + /*std::cerr << "CORRECT: " << correct << " PREDICTED: " << predicted << std::endl;*/ acc += (correct == predicted); }