From 2583e0fa42e1eae4ae7e03812d33773fa9469017 Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Thu, 15 Sep 2016 11:05:14 +0200 Subject: [PATCH 1/7] changed numerical range in random init --- src/param_initializers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/param_initializers.h b/src/param_initializers.h index 3e442c6e..5a04a25c 100644 --- a/src/param_initializers.h +++ b/src/param_initializers.h @@ -22,7 +22,7 @@ void ones(Tensor t) { void randreal(Tensor t) { std::random_device device; std::default_random_engine engine(device()); - std::uniform_real_distribution<> dist(0, 0.01); + std::uniform_real_distribution<> dist(0, 0.1); auto gen = std::bind(dist, engine); std::vector vals(t.size()); From 499faceb8ea5e7b9bf4cbe9d143173961dcea3dc Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Thu, 15 Sep 2016 12:23:31 +0200 Subject: [PATCH 2/7] tidied up initializers --- src/param_initializers.h | 31 ++++++++++++++++++++++++------- src/validate_mnist.cu | 7 +++---- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/param_initializers.h b/src/param_initializers.h index 5a04a25c..04c6b48e 100644 --- a/src/param_initializers.h +++ b/src/param_initializers.h @@ -10,25 +10,42 @@ namespace marian { void zeros(Tensor t) { - std::vector vals(t.size(), 0.0f); - thrust::copy(vals.begin(), vals.end(), t.begin()); + t.set(0.f); } void ones(Tensor t) { - std::vector vals(t.size(), 1.0f); - thrust::copy(vals.begin(), vals.end(), t.begin()); + t.set(1.0f); } -void randreal(Tensor t) { +template +void distribution(Tensor t, float a=0.0, float b=0.1) { std::random_device device; std::default_random_engine engine(device()); - std::uniform_real_distribution<> dist(0, 0.1); + Distribution dist(a, b); auto gen = std::bind(dist, engine); std::vector vals(t.size()); std::generate(begin(vals), end(vals), gen); - thrust::copy(vals.begin(), vals.end(), t.begin()); + t << vals; } +std::function normal(float mean = 0.0, float std = 0.1) { + return [mean, std](Tensor t) { + distribution>(t, mean, std); + }; +} + +std::function uniform(float a = 0.0, float b = 0.1) { + return [a, b](Tensor t) { + distribution>(t, a, b); + }; +} + +std::function from_vector(const std::vector& v) { + return [&v](Tensor t) { + t << v; + }; +} + } // namespace marian diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index e9b5735d..7d812e36 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -9,7 +9,7 @@ using namespace keywords; int main(int argc, char** argv) { - cudaSetDevice(0); + cudaSetDevice(1); const size_t IMAGE_SIZE = 784; const size_t LABEL_SIZE = 10; @@ -20,7 +20,6 @@ int main(int argc, char** argv) { std::vector testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE); std::cerr << "Done." << std::endl; - std::cerr << "Loading model params..."; NpzConverter converter("../scripts/test_model/model.npz"); @@ -36,9 +35,9 @@ int main(int argc, char** argv) { auto y = input(shape={whatevs, LABEL_SIZE}); auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, - init=[wData](Tensor t) { t.set(wData); }); + init=from_vector(wData)); auto b = param(shape={1, LABEL_SIZE}, - init=[bData](Tensor t) { t.set(bData); }); + init=from_vector(bData)); auto probs = softmax(dot(x, w) + b, axis=1); auto cost = -mean(sum(y * log(probs), axis=1), axis=0); From 6d3f67e9555d76c7926728b8aae07e535faeb028 Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Thu, 15 Sep 2016 13:54:46 +0100 Subject: [PATCH 3/7] Fixed backward for fast softmax. --- src/expression_operators.h | 14 -------------- src/graph_operators.h | 7 +++---- src/test.cu | 2 +- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/expression_operators.h b/src/expression_operators.h index 253047d3..957ceed1 100644 --- a/src/expression_operators.h +++ b/src/expression_operators.h @@ -153,20 +153,6 @@ inline Expr sum(Expr a, Args ...args) { template inline Expr softmax(Expr a, Args ...args) { Expr e = exp(a); -#if 0 - ChainPtr n = a.node(); - auto print_shape = [n]() -> Shape { - std::cerr << "Shape: "; - for (auto val : n->val().shape()) { - std::cerr << val << " "; - } - std::cerr << std::endl; - return {1,1}; - }; - using namespace keywords; - Expr one = ones(shape={1, 1}, lazy_shape=print_shape); -#endif - return e / sum(e, args...); } diff --git a/src/graph_operators.h b/src/graph_operators.h index c7c0a057..a6320201 100644 --- a/src/graph_operators.h +++ b/src/graph_operators.h @@ -162,11 +162,10 @@ struct SoftmaxNodeOp : public UnaryNodeOp { // For each row, the Jacobian times vector is given by: // J * dy = p .* (dy - avg*1) // where avg = p'*dy and p is the softmax output (probabilities). - Tensor result = adj_; + Tensor result(adj_.shape()); + thrust::copy(adj_.begin(), adj_.end(), result.begin()); SubtractMean(&result, val_); - // beta set to 1.0 in gemm, C = alpha * dot(A,B) + beta * C - // to sum gradients from different graph parts. - Prod(a_->grad(), adj_, result, false, false, 1.0); + Element(_1 += _2 * _3, a_->grad(), val_, result); } }; diff --git a/src/test.cu b/src/test.cu index 629c1bc2..85636dd3 100644 --- a/src/test.cu +++ b/src/test.cu @@ -19,7 +19,7 @@ int main(int argc, char** argv) { //Expr b = param(shape={1, 2}, name="b0"); std::cerr << "Building model..."; - auto predict = softmax(dot(x, w), + auto predict = softmax_fast(dot(x, w), axis=1, name="pred"); auto graph = -mean(sum(y * log(predict), axis=1), axis=0, name="cost"); From 61d8b3cb83c0e16ca979df8d3798a89ed3fc42da Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Thu, 15 Sep 2016 15:10:03 +0100 Subject: [PATCH 4/7] Replaced softmax with fast softmax in MNIST. --- src/test.cu | 144 ++++++++++++++++++++++-------------------- src/validate_mnist.cu | 2 +- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/src/test.cu b/src/test.cu index 85636dd3..e2ac2d45 100644 --- a/src/test.cu +++ b/src/test.cu @@ -9,9 +9,10 @@ int main(int argc, char** argv) { /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ +#if 1 using namespace marian; using namespace keywords; - + Expr x = input(shape={1, 2}); Expr y = input(shape={1, 2}); @@ -41,75 +42,80 @@ int main(int argc, char** argv) { std::cerr << graph.val().Debug() << std::endl; std::cerr << w.grad().Debug() << std::endl; //std::cerr << b.grad().Debug() << std::endl; +#else + -// using namespace marian; -// using namespace keywords; -// -// const size_t BATCH_SIZE = 500; -// const size_t IMAGE_SIZE = 784; -// const size_t LABEL_SIZE = 10; -// -// Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); -// Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); -// -// Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); -// Expr b = param(shape={1, LABEL_SIZE}, name="b0"); -// -// Expr z = dot(x, w) + b; -// Expr lr = softmax(z, axis=1, name="pred"); -// Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); -// //cerr << "x=" << Debug(lr.val().shape()) << endl; -// -// int numofdata; -// //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); -// //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); -// vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); -// vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); -// cerr << "images=" << images.size() << " labels=" << labels.size() << endl; -// cerr << "numofdata=" << numofdata << endl; -// -// size_t startInd = 0; -// size_t startIndData = 0; -// while (startInd < numofdata) { -// size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; -// cerr << "startInd=" << startInd -// << " startIndData=" << startIndData -// << " batchSize=" << batchSize << endl; -// -// Tensor tx({numofdata, IMAGE_SIZE}, 1); -// Tensor ty({numofdata, LABEL_SIZE}, 1); -// -// tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); -// ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize); -// -// //cerr << "tx=" << Debug(tx.shape()) << endl; -// //cerr << "ty=" << Debug(ty.shape()) << endl; -// -// x = tx; -// y = ty; -// -// cerr << "x=" << Debug(x.val().shape()) << endl; -// cerr << "y=" << Debug(y.val().shape()) << endl; -// -// -// graph.forward(batchSize); -// -// cerr << "w=" << Debug(w.val().shape()) << endl; -// cerr << "b=" << Debug(b.val().shape()) << endl; -// std::cerr << "z: " << Debug(z.val().shape()) << endl; -// std::cerr << "lr: " << Debug(lr.val().shape()) << endl; -// std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; -// -// //std::cerr << "scores=" << scores.val().Debug() << endl; -// //std::cerr << "lr=" << lr.val().Debug() << endl; -// -// //graph.backward(); -// -// //std::cerr << graph["pred"].val()[0] << std::endl; -// -// startInd += batchSize; -// startIndData += batchSize * IMAGE_SIZE; -// } + using namespace marian; + using namespace keywords; + using namespace std; + const size_t BATCH_SIZE = 500; + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; + + Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + Expr b = param(shape={1, LABEL_SIZE}, name="b0"); + + Expr z = dot(x, w) + b; + Expr lr = softmax(z, axis=1, name="pred"); + Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + //cerr << "x=" << Debug(lr.val().shape()) << endl; + + int numofdata; + //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); + cerr << "images=" << images.size() << " labels=" << labels.size() << endl; + cerr << "numofdata=" << numofdata << endl; + + size_t startInd = 0; + size_t startIndData = 0; + while (startInd < numofdata) { + size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; + cerr << "startInd=" << startInd + << " startIndData=" << startIndData + << " batchSize=" << batchSize << endl; + + Tensor tx({numofdata, IMAGE_SIZE}, 1); + Tensor ty({numofdata, LABEL_SIZE}, 1); + + tx.set(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); + ty.set(labels.begin() + startInd, labels.begin() + startInd + batchSize); + + //cerr << "tx=" << Debug(tx.shape()) << endl; + //cerr << "ty=" << Debug(ty.shape()) << endl; + + x = tx; + y = ty; + + cerr << "x=" << Debug(x.val().shape()) << endl; + cerr << "y=" << Debug(y.val().shape()) << endl; + + + graph.forward(batchSize); + + cerr << "w=" << Debug(w.val().shape()) << endl; + cerr << "b=" << Debug(b.val().shape()) << endl; + std::cerr << "z: " << Debug(z.val().shape()) << endl; + std::cerr << "lr: " << Debug(lr.val().shape()) << endl; + std::cerr << "Log-likelihood: " << graph.val().Debug() << endl ; + + //std::cerr << "scores=" << scores.val().Debug() << endl; + //std::cerr << "lr=" << lr.val().Debug() << endl; + + graph.backward(); + std::cerr << w.grad().Debug() << std::endl; + + //std::cerr << graph["pred"].val()[0] << std::endl; + + startInd += batchSize; + startIndData += batchSize * IMAGE_SIZE; + } +#endif + return 0; } diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index 7d812e36..9d9cdf8b 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -39,7 +39,7 @@ int main(int argc, char** argv) { auto b = param(shape={1, LABEL_SIZE}, init=from_vector(bData)); - auto probs = softmax(dot(x, w) + b, axis=1); + auto probs = softmax_fast(dot(x, w) + b, axis=1); auto cost = -mean(sum(y * log(probs), axis=1), axis=0); std::cerr << "Done." << std::endl; From 037679f8a3d41e5d71013735563929ab208510a5 Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Thu, 15 Sep 2016 17:02:25 +0200 Subject: [PATCH 5/7] update requirements and paths to be exported --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6bee418b..67685375 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ Installation Requirements: -* g++ with C++14 +* g++ with c++11 * CUDA and CuDNN +* Boost (>= 1.56) Exporting some paths for CuDNN may be required (put it, for example, in your `.bashrc` file): export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin - export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64 + export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64 export CPATH=$CPATH:/usr/local/cudnn-5/include Compilation with `cmake > 3.5`: From 4a1ab1f5b458814522c7c2f897999d1e74534124 Mon Sep 17 00:00:00 2001 From: Maximiliana Behnke Date: Thu, 15 Sep 2016 17:41:24 +0200 Subject: [PATCH 6/7] Change path to Keras single layer model --- src/validate_mnist.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index 9d9cdf8b..43e1fedc 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -21,7 +21,7 @@ int main(int argc, char** argv) { std::cerr << "Done." << std::endl; std::cerr << "Loading model params..."; - NpzConverter converter("../scripts/test_model/model.npz"); + NpzConverter converter("../scripts/test_model_single/model.npz"); std::vector wData, bData; Shape wShape, bShape; From eba5b462257a9949fd124378c53e9bf7b357b1d3 Mon Sep 17 00:00:00 2001 From: Maximiliana Behnke Date: Thu, 15 Sep 2016 17:41:58 +0200 Subject: [PATCH 7/7] Validation on 2-layer Keras model --- src/validate_mnist_batch.cu | 51 ++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu index ac4e7359..e2e6438c 100644 --- a/src/validate_mnist_batch.cu +++ b/src/validate_mnist_batch.cu @@ -21,22 +21,38 @@ int main(int argc, char** argv) { std::cerr << "\tDone." << std::endl; std::cerr << "Loading model params..."; - NpzConverter converter("../scripts/test_model/model.npz"); + NpzConverter converter("../scripts/test_model_multi/model.npz"); - std::vector wData; - Shape wShape; - converter.Load("weights", wData, wShape); + std::vector wData1; + Shape wShape1; + converter.Load("weights1", wData1, wShape1); + + std::vector bData1; + Shape bShape1; + converter.Load("bias1", bData1, bShape1); + + std::vector wData2; + Shape wShape2; + converter.Load("weights2", wData2, wShape2); + + std::vector bData2; + Shape bShape2; + converter.Load("bias2", bData2, bShape2); - std::vector bData; - Shape bShape; - converter.Load("bias", bData, bShape); - - auto initW = [wData](Tensor t) { - t.set(wData); + auto initW1 = [wData1](Tensor t) { + t.set(wData1); }; - auto initB = [bData](Tensor t) { - t.set(bData); + auto initB1 = [bData1](Tensor t) { + t.set(bData1); + }; + + auto initW2 = [wData2](Tensor t) { + t.set(wData2); + }; + + auto initB2 = [bData2](Tensor t) { + t.set(bData2); }; std::cerr << "\tDone." << std::endl; @@ -45,11 +61,15 @@ int main(int argc, char** argv) { auto x = input(shape={whatevs, IMAGE_SIZE}, name="X"); auto y = input(shape={whatevs, LABEL_SIZE}, name="Y"); - auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); - auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); + auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1); + auto b1 = param(shape={1, 100}, name="b0", init=initB1); + auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2); + auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2); std::cerr << "Building model..."; - auto predict = softmax(dot(x, w) + b, axis=1, name="pred"); + auto layer1 = tanh(dot(x, w1) + b1); + auto layer2 = softmax(dot(layer1, w2) + b2, axis=1, name="layer2"); + auto predict = layer2; std::cerr << "Done." << std::endl; @@ -77,6 +97,7 @@ int main(int argc, char** argv) { if (testLabels[startId * LABEL_SIZE + i + j]) correct = j; if (results[i + j] > results[i + predicted]) predicted = j; } + /*std::cerr << "CORRECT: " << correct << " PREDICTED: " << predicted << std::endl;*/ acc += (correct == predicted); }