From 61d8b3cb83c0e16ca979df8d3798a89ed3fc42da Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Thu, 15 Sep 2016 15:10:03 +0100 Subject: [PATCH] Replaced softmax with fast softmax in MNIST. --- src/test.cu | 144 ++++++++++++++++++++++-------------------- src/validate_mnist.cu | 2 +- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/src/test.cu b/src/test.cu index 85636dd3..e2ac2d45 100644 --- a/src/test.cu +++ b/src/test.cu @@ -9,9 +9,10 @@ int main(int argc, char** argv) { /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ +#if 1 using namespace marian; using namespace keywords; - + Expr x = input(shape={1, 2}); Expr y = input(shape={1, 2}); @@ -41,75 +42,80 @@ int main(int argc, char** argv) { std::cerr << graph.val().Debug() << std::endl; std::cerr << w.grad().Debug() << std::endl; //std::cerr << b.grad().Debug() << std::endl; +#else + -// using namespace marian; -// using namespace keywords; -// -// const size_t BATCH_SIZE = 500; -// const size_t IMAGE_SIZE = 784; -// const size_t LABEL_SIZE = 10; -// -// Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); -// Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); -// -// Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); -// Expr b = param(shape={1, LABEL_SIZE}, name="b0"); -// -// Expr z = dot(x, w) + b; -// Expr lr = softmax(z, axis=1, name="pred"); -// Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); -// //cerr << "x=" << Debug(lr.val().shape()) << endl; -// -// int numofdata; -// //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); -// //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); -// vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); -// vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); -// cerr << "images=" << images.size() << " labels=" << labels.size() << endl; -// cerr << "numofdata=" << numofdata << endl; -// -// size_t startInd = 0; -// size_t startIndData = 0; -// while (startInd < numofdata) { -// size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; -// cerr << "startInd=" << startInd -// << " startIndData=" << startIndData -// << " batchSize=" << batchSize << endl; -// -// Tensor tx({numofdata, IMAGE_SIZE}, 1); -// Tensor ty({numofdata, LABEL_SIZE}, 1); -// -// tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); -// ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize); -// -// //cerr << "tx=" << Debug(tx.shape()) << endl; -// //cerr << "ty=" << Debug(ty.shape()) << endl; -// -// x = tx; -// y = ty; -// -// cerr << "x=" << Debug(x.val().shape()) << endl; -// cerr << "y=" << Debug(y.val().shape()) << endl; -// -// -// graph.forward(batchSize); -// -// cerr << "w=" << Debug(w.val().shape()) << endl; -// cerr << "b=" << Debug(b.val().shape()) << endl; -// std::cerr << "z: " << Debug(z.val().shape()) << endl; -// std::cerr << "lr: " << Debug(lr.val().shape()) << endl; -// std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; -// -// //std::cerr << "scores=" << scores.val().Debug() << endl; -// //std::cerr << "lr=" << lr.val().Debug() << endl; -// -// //graph.backward(); -// -// //std::cerr << graph["pred"].val()[0] << std::endl; -// -// startInd += batchSize; -// startIndData += batchSize * IMAGE_SIZE; -// } + using namespace marian; + using namespace keywords; + using namespace std; + const size_t BATCH_SIZE = 500; + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; + + Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + Expr b = param(shape={1, LABEL_SIZE}, name="b0"); + + Expr z = dot(x, w) + b; + Expr lr = softmax(z, axis=1, name="pred"); + Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + //cerr << "x=" << Debug(lr.val().shape()) << endl; + + int numofdata; + //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); + cerr << "images=" << images.size() << " labels=" << labels.size() << endl; + cerr << "numofdata=" << numofdata << endl; + + size_t startInd = 0; + size_t startIndData = 0; + while (startInd < numofdata) { + size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; + cerr << "startInd=" << startInd + << " startIndData=" << startIndData + << " batchSize=" << batchSize << endl; + + Tensor tx({numofdata, IMAGE_SIZE}, 1); + Tensor ty({numofdata, LABEL_SIZE}, 1); + + tx.set(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); + ty.set(labels.begin() + startInd, labels.begin() + startInd + batchSize); + + //cerr << "tx=" << Debug(tx.shape()) << endl; + //cerr << "ty=" << Debug(ty.shape()) << endl; + + x = tx; + y = ty; + + cerr << "x=" << Debug(x.val().shape()) << endl; + cerr << "y=" << Debug(y.val().shape()) << endl; + + + graph.forward(batchSize); + + cerr << "w=" << Debug(w.val().shape()) << endl; + cerr << "b=" << Debug(b.val().shape()) << endl; + std::cerr << "z: " << Debug(z.val().shape()) << endl; + std::cerr << "lr: " << Debug(lr.val().shape()) << endl; + std::cerr << "Log-likelihood: " << graph.val().Debug() << endl ; + + //std::cerr << "scores=" << scores.val().Debug() << endl; + //std::cerr << "lr=" << lr.val().Debug() << endl; + + graph.backward(); + std::cerr << w.grad().Debug() << std::endl; + + //std::cerr << graph["pred"].val()[0] << std::endl; + + startInd += batchSize; + startIndData += batchSize * IMAGE_SIZE; + } +#endif + return 0; } diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index 7d812e36..9d9cdf8b 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -39,7 +39,7 @@ int main(int argc, char** argv) { auto b = param(shape={1, LABEL_SIZE}, init=from_vector(bData)); - auto probs = softmax(dot(x, w) + b, axis=1); + auto probs = softmax_fast(dot(x, w) + b, axis=1); auto cost = -mean(sum(y * log(probs), axis=1), axis=0); std::cerr << "Done." << std::endl;