From 6974ceb9d1132bd1ffc3445662468a1a91e5c599 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 15:16:12 +0200 Subject: [PATCH 1/3] return reference --- src/expressions.cu | 2 +- src/expressions.h | 2 +- src/graph.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/expressions.cu b/src/expressions.cu index a95b1bef..2d656ce1 100644 --- a/src/expressions.cu +++ b/src/expressions.cu @@ -10,7 +10,7 @@ Expr::Expr(Chainable* chainable) : pimpl_(chainable) {} Expr::Expr(Float v) : pimpl_(new ConstantNode(keywords::value=v, keywords::shape={1,1})) {} -Tensor Expr::val() { +Tensor &Expr::val() { return pimpl_->val(); } diff --git a/src/expressions.h b/src/expressions.h index d7945f07..09d0edfa 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -15,7 +15,7 @@ class Expr { return *this; } - Tensor val(); + Tensor &val(); Tensor grad(); void forward(size_t batchSize); diff --git a/src/graph.h b/src/graph.h index 15b4721d..33de8a5e 100644 --- a/src/graph.h +++ b/src/graph.h @@ -17,7 +17,7 @@ struct Chainable { virtual void allocate(size_t) = 0; virtual const Shape& shape() = 0; - virtual DataType val() = 0; + virtual DataType &val() = 0; virtual DataType grad() = 0; virtual void setVal(Tensor t) { UTIL_THROW2("Tensors can only be assigned to input nodes"); @@ -82,7 +82,7 @@ class Node : public Chainable, } } - virtual Tensor val() { + virtual Tensor &val() { UTIL_THROW_IF2(!val_, "Tensor has not been allocated"); return val_; }; @@ -104,4 +104,4 @@ class Node : public Chainable, Tensor adj_; }; -} \ No newline at end of file +} From a573eecf5c9fb0eb83b7516ee03294ca6c3c682e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 14:33:30 +0100 Subject: [PATCH 2/3] debug --- src/test.cu | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/test.cu b/src/test.cu index 9eb9b498..777b4b39 100644 --- a/src/test.cu +++ b/src/test.cu @@ -20,15 +20,19 @@ int main(int argc, char** argv) { Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); Expr b = param(shape={1, LABEL_SIZE}, name="b0"); - + Expr z = dot(x, w) + b; Expr lr = softmax(z, axis=1, name="pred"); Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); - //cerr << "lr=" << Debug(lr.val().shape()) << endl; + //cerr << "x=" << Debug(lr.val().shape()) << endl; int numofdata; - vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); - vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); + + cerr << "images=" << images.size() << " labels=" << labels.size() << endl; cerr << "numofdata=" << numofdata << endl; @@ -38,14 +42,20 @@ int main(int argc, char** argv) { tx.Load(images); ty.Load(labels); - cerr << "tx=" << Debug(tx.shape()) << endl; - cerr << "ty=" << Debug(ty.shape()) << endl; + //cerr << "tx=" << Debug(tx.shape()) << endl; + //cerr << "ty=" << Debug(ty.shape()) << endl; x = tx; y = ty; + cerr << "x=" << Debug(x.val().shape()) << endl; + cerr << "y=" << Debug(y.val().shape()) << endl; + + graph.forward(500); + cerr << "w=" << Debug(w.val().shape()) << endl; + cerr << "b=" << Debug(b.val().shape()) << endl; std::cerr << "z: " << Debug(z.val().shape()) << endl; std::cerr << "lr: " << Debug(lr.val().shape()) << endl; std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; From f0f0dbe9eee33db69c5efd246a8cd683843a20c7 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 16:25:58 +0200 Subject: [PATCH 3/3] batches --- src/tensor.cu | 6 +++--- src/tensor.h | 10 ++++----- src/test.cu | 57 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/tensor.cu b/src/tensor.cu index c5619b99..398b696a 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -80,12 +80,12 @@ void Tensor::Load(const std::string &path) } strm.close(); - Load(hostData); + Load(hostData.begin(), hostData.begin()); } -void Tensor::Load(const std::vector &values) +void Tensor::Load(const std::vector::const_iterator &begin, const std::vector::const_iterator &end) { - pimpl_->set(values); + pimpl_->set(begin, end); } } diff --git a/src/tensor.h b/src/tensor.h index 83965508..d6acea11 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -158,11 +158,11 @@ class TensorImpl { thrust::fill(data_.begin(), data_.end(), value); } - void set(const std::vector &values) { + void set(const std::vector::const_iterator &begin, const std::vector::const_iterator &end) { size_t totSize = GetTotalSize(shape()); - std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl; - assert(totSize == values.size()); - thrust::copy(values.begin(), values.end(), data_.begin()); + //std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl; + //assert(totSize == values.size()); + thrust::copy(begin, end, data_.begin()); } std::string Debug() const @@ -275,7 +275,7 @@ class Tensor { } void Load(const std::string &path); - void Load(const std::vector &values); + void Load(const std::vector::const_iterator &begin, const std::vector::const_iterator &end); }; diff --git a/src/test.cu b/src/test.cu index 777b4b39..a78e182f 100644 --- a/src/test.cu +++ b/src/test.cu @@ -12,6 +12,7 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; + const size_t BATCH_SIZE = 500; const size_t IMAGE_SIZE = 784; const size_t LABEL_SIZE = 10; @@ -31,41 +32,51 @@ int main(int argc, char** argv) { //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); - - cerr << "images=" << images.size() << " labels=" << labels.size() << endl; cerr << "numofdata=" << numofdata << endl; - Tensor tx({numofdata, IMAGE_SIZE}, 1); - Tensor ty({numofdata, LABEL_SIZE}, 1); + size_t startInd = 0; + size_t startIndData = 0; + while (startInd < numofdata) { + size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; + cerr << "startInd=" << startInd + << " startIndData=" << startIndData + << " batchSize=" << batchSize << endl; - tx.Load(images); - ty.Load(labels); + Tensor tx({numofdata, IMAGE_SIZE}, 1); + Tensor ty({numofdata, LABEL_SIZE}, 1); - //cerr << "tx=" << Debug(tx.shape()) << endl; - //cerr << "ty=" << Debug(ty.shape()) << endl; + tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); + ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize); - x = tx; - y = ty; + //cerr << "tx=" << Debug(tx.shape()) << endl; + //cerr << "ty=" << Debug(ty.shape()) << endl; - cerr << "x=" << Debug(x.val().shape()) << endl; - cerr << "y=" << Debug(y.val().shape()) << endl; + x = tx; + y = ty; + + cerr << "x=" << Debug(x.val().shape()) << endl; + cerr << "y=" << Debug(y.val().shape()) << endl; - graph.forward(500); + graph.forward(batchSize); - cerr << "w=" << Debug(w.val().shape()) << endl; - cerr << "b=" << Debug(b.val().shape()) << endl; - std::cerr << "z: " << Debug(z.val().shape()) << endl; - std::cerr << "lr: " << Debug(lr.val().shape()) << endl; - std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; + cerr << "w=" << Debug(w.val().shape()) << endl; + cerr << "b=" << Debug(b.val().shape()) << endl; + std::cerr << "z: " << Debug(z.val().shape()) << endl; + std::cerr << "lr: " << Debug(lr.val().shape()) << endl; + std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; - //std::cerr << "scores=" << scores.val().Debug() << endl; - std::cerr << "lr=" << lr.val().Debug() << endl; + //std::cerr << "scores=" << scores.val().Debug() << endl; + std::cerr << "lr=" << lr.val().Debug() << endl; - graph.backward(); - - //std::cerr << graph["pred"].val()[0] << std::endl; + graph.backward(); + + //std::cerr << graph["pred"].val()[0] << std::endl; + + startInd += batchSize; + startIndData += batchSize * IMAGE_SIZE; + } // XOR