From 149e789cc34a20086849c4e2b609a021437a0af5 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 14 Sep 2016 13:13:32 +0100
Subject: [PATCH 01/10] debug

---
 src/test.cu | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/src/test.cu b/src/test.cu
index 90dffa04..6a4ef4bf 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -46,21 +46,9 @@ int main(int argc, char** argv) {
 
   graph.forward(500);
 
-  std::cerr << "Result: ";
-  for (auto val : scores.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
-  std::cerr << "Result: ";
-  for (auto val : lr.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
-  std::cerr << "Log-likelihood: ";
-  for (auto val : graph.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
+  std::cerr << "scores: " << Debug(scores.val().shape()) << endl;
+  std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
+  std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 
   graph.backward();
   

From 9643f52aa5dae8f9c1d8435b777ceeaaeb76ea57 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 14 Sep 2016 14:30:23 +0200
Subject: [PATCH 02/10] debug

---
 src/tensor.h |  6 ++++--
 src/test.cu  | 14 ++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/tensor.h b/src/tensor.h
index 02e1645f..499e1ae0 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -172,11 +172,13 @@ class TensorImpl {
     	strm << "shape=" << marian::Debug(shape_) << std::endl;
 
     	// values
-    	/*
     	size_t totSize = GetTotalSize(shape());
     	std::vector<Float> values(totSize);
 		thrust::copy(data_.begin(), data_.end(), values.begin());
-		*/
+
+		for (size_t i = 0; i < totSize; ++i) {
+			strm << values[i] << " ";
+		}
     	return strm.str();
     }
 };
diff --git a/src/test.cu b/src/test.cu
index 6a4ef4bf..dec93919 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -21,10 +21,10 @@ int main(int argc, char** argv) {
   Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
   Expr b = param(shape={1, LABEL_SIZE}, name="b0");
   
-  auto scores = dot(x, w) + b;
-  auto lr = softmax(scores, axis=1, name="pred");
-  auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
-  cerr << "lr=" << lr.Debug() << endl;
+  Expr scores = dot(x, w) + b;
+  Expr lr = softmax(scores, axis=1, name="pred");
+  Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
+  cerr << "lr=" << Debug(lr.val().shape()) << endl;
 
   int numofdata;
   vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
@@ -38,8 +38,8 @@ int main(int argc, char** argv) {
   tx.Load(images);
   ty.Load(labels);
 
-  cerr << "tx=" << tx.Debug() << endl;
-  cerr << "ty=" << ty.Debug() << endl;
+  cerr << "tx=" << Debug(tx.shape()) << endl;
+  cerr << "ty=" << Debug(ty.shape()) << endl;
 
   x = tx;
   y = ty;
@@ -50,6 +50,8 @@ int main(int argc, char** argv) {
   std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
   std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 
+  std::cerr << "scores=" << scores.val().Debug() << endl;
+
   graph.backward();
   
   //std::cerr << graph["pred"].val()[0] << std::endl;

From 04a620db191e933dd8edb78e3386aefa4225a9c0 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 14 Sep 2016 13:42:59 +0100
Subject: [PATCH 03/10] debug

---
 src/test.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/test.cu b/src/test.cu
index dec93919..bd417cee 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -24,7 +24,7 @@ int main(int argc, char** argv) {
   Expr scores = dot(x, w) + b;
   Expr lr = softmax(scores, axis=1, name="pred");
   Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
-  cerr << "lr=" << Debug(lr.val().shape()) << endl;
+  //cerr << "lr=" << Debug(lr.val().shape()) << endl;
 
   int numofdata;
   vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
@@ -50,7 +50,8 @@ int main(int argc, char** argv) {
   std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
   std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 
-  std::cerr << "scores=" << scores.val().Debug() << endl;
+  //std::cerr << "scores=" << scores.val().Debug() << endl;
+  std::cerr << "lr=" << lr.val().Debug() << endl;
 
   graph.backward();
   

From f05d17e7ae179ac5d94bf12e443c28121ec7ab23 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 14 Sep 2016 14:51:49 +0200
Subject: [PATCH 04/10] output tensors in shape

---
 src/tensor.h | 9 +++++++--
 src/test.cu  | 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/tensor.h b/src/tensor.h
index 499e1ae0..83965508 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -176,8 +176,13 @@ class TensorImpl {
     	std::vector<Float> values(totSize);
 		thrust::copy(data_.begin(), data_.end(), values.begin());
 
-		for (size_t i = 0; i < totSize; ++i) {
-			strm << values[i] << " ";
+		size_t ind = 0;
+		for (size_t i = 0; i < shape()[0]; ++i) {
+			for (size_t j = 0; j < shape()[1]; ++j) {
+				strm << values[ind] << " ";
+				++ind;
+			}
+			strm << std::endl;
 		}
     	return strm.str();
     }
diff --git a/src/test.cu b/src/test.cu
index bd417cee..9eb9b498 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -21,8 +21,8 @@ int main(int argc, char** argv) {
   Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
   Expr b = param(shape={1, LABEL_SIZE}, name="b0");
   
-  Expr scores = dot(x, w) + b;
-  Expr lr = softmax(scores, axis=1, name="pred");
+  Expr z = dot(x, w) + b;
+  Expr lr = softmax(z, axis=1, name="pred");
   Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
   //cerr << "lr=" << Debug(lr.val().shape()) << endl;
 
@@ -46,7 +46,7 @@ int main(int argc, char** argv) {
 
   graph.forward(500);
 
-  std::cerr << "scores: " << Debug(scores.val().shape()) << endl;
+  std::cerr << "z: " << Debug(z.val().shape()) << endl;
   std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
   std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 

From 6974ceb9d1132bd1ffc3445662468a1a91e5c599 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 14 Sep 2016 15:16:12 +0200
Subject: [PATCH 05/10] return reference

---
 src/expressions.cu | 2 +-
 src/expressions.h  | 2 +-
 src/graph.h        | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/expressions.cu b/src/expressions.cu
index a95b1bef..2d656ce1 100644
--- a/src/expressions.cu
+++ b/src/expressions.cu
@@ -10,7 +10,7 @@ Expr::Expr(Chainable<Tensor>* chainable) : pimpl_(chainable) {}
 Expr::Expr(Float v) : pimpl_(new ConstantNode(keywords::value=v,
                                               keywords::shape={1,1})) {}
 
-Tensor Expr::val() {
+Tensor &Expr::val() {
   return pimpl_->val();
 }
 
diff --git a/src/expressions.h b/src/expressions.h
index d7945f07..09d0edfa 100644
--- a/src/expressions.h
+++ b/src/expressions.h
@@ -15,7 +15,7 @@ class Expr {
       return *this;
     }
     
-    Tensor val();
+    Tensor &val();
     Tensor grad();
     
     void forward(size_t batchSize);
diff --git a/src/graph.h b/src/graph.h
index 15b4721d..33de8a5e 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -17,7 +17,7 @@ struct Chainable {
     virtual void allocate(size_t) = 0;
     
     virtual const Shape& shape() = 0;
-    virtual DataType val() = 0;
+    virtual DataType &val() = 0;
     virtual DataType grad() = 0;
     virtual void setVal(Tensor t) {
       UTIL_THROW2("Tensors can only be assigned to input nodes"); 
@@ -82,7 +82,7 @@ class Node : public Chainable<Tensor>,
       }
     }
     
-    virtual Tensor val()  {
+    virtual Tensor &val()  {
       UTIL_THROW_IF2(!val_, "Tensor has not been allocated");
       return val_;
     };
@@ -104,4 +104,4 @@ class Node : public Chainable<Tensor>,
     Tensor adj_;
 };
 
-}
\ No newline at end of file
+}

From 0421d8504dad3c0ac97f6bea06d81dcd78a39179 Mon Sep 17 00:00:00 2001
From: Andre Martins <andre.t.martins@gmail.com>
Date: Wed, 14 Sep 2016 14:30:40 +0100
Subject: [PATCH 06/10] Implemented gradient of fast softmax.

---
 src/graph_operators.h   |  9 ++++---
 src/tensor_operators.cu | 53 +++++++++++++++++++++++++++++++++++++++--
 src/tensor_operators.h  |  5 ++++
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/src/graph_operators.h b/src/graph_operators.h
index 5a12f807..88c174c0 100644
--- a/src/graph_operators.h
+++ b/src/graph_operators.h
@@ -119,9 +119,12 @@ struct SoftmaxNodeOp : public UnaryNodeOp {
   }
   
   void backward() {
-    // TODO
-    Element(_1 += _2 * Exp(_3),
-            a_->grad(), adj_, a_->val());
+    // For each row, the Jacobian times vector is given by:
+    // J * dy = p .* (dy - avg*1)
+    // where avg = p'*dy and p is the softmax output (probabilities).
+    Tensor result = adj_;
+    SubtractMean(&result, val_);
+    Prod(a_->grad(), adj_, result, false, false);
   }
 };
 
diff --git a/src/tensor_operators.cu b/src/tensor_operators.cu
index 2d1d541d..2aa96331 100644
--- a/src/tensor_operators.cu
+++ b/src/tensor_operators.cu
@@ -2,7 +2,57 @@
 
 namespace marian {
 
-// TODO: implement this.
+__global__ void gSubtractMean(float* out, float* weights,
+                              size_t rows, size_t cols) {
+  for(int bid = 0; bid < rows; bid += gridDim.x) {
+    int j = bid + blockIdx.x;
+    if(j < rows) {
+      extern __shared__ float _share[];
+      float* _sum = _share + blockDim.x;
+      float* sp = out + j * cols;
+      float* w = weights + j * cols;
+      _sum[threadIdx.x] = 0.0;
+      for(int tid = 0; tid < cols; tid += blockDim.x) {
+        int id = tid + threadIdx.x;
+        if(id < cols) {
+          _sum[threadIdx.x] += w[id] * sp[id];
+        }
+      }
+      __syncthreads();
+      int len = blockDim.x;
+      while(len != 1) {
+        __syncthreads();
+        int skip = (len + 1) >> 1;
+        if(threadIdx.x < (len >> 1))
+          _sum[threadIdx.x] += _sum[threadIdx.x + skip];
+        len = (len + 1) >> 1;
+      }
+      __syncthreads();
+      for(int tid = 0; tid < cols; tid += blockDim.x){
+        int id = tid + threadIdx.x;
+        if(id < cols)
+          sp[id] -= _sum[0];
+      }
+    }
+  }
+}
+
+void SubtractMean(Tensor* Out, Tensor &Weights) {
+  // Out and Weights are both m-by-k matrices, passed as input.
+  // A weighted average of each row of Out (according to the weights
+  // specified in Weights) is computed and subtracted from Out.
+  // Out is both input and output.
+  size_t m = Out->shape()[0];
+  size_t k = Out->shape()[1];
+
+  int blocks = std::min(MAX_BLOCKS, (int) m);
+  int threads = std::min(MAX_THREADS, (int) k);
+  int shared = sizeof(float) * threads * 2;
+  gSubtractMean<<<blocks, threads, shared>>>(Out->data(), Weights.data(),
+                                             m, k);
+  cudaStreamSynchronize(0);
+}
+
 __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) {
   for(int bid = 0; bid < rows; bid += gridDim.x) {
     int j = bid + blockIdx.x;
@@ -37,7 +87,6 @@ __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) {
   }
 }
 
-// TODO: implement this.
 void Softmax(Tensor* Out) {
   size_t m = Out->shape()[0];
   size_t k = Out->shape()[1];
diff --git a/src/tensor_operators.h b/src/tensor_operators.h
index a0c30104..03d754e3 100644
--- a/src/tensor_operators.h
+++ b/src/tensor_operators.h
@@ -142,6 +142,11 @@ void Element(Functor functor,
   cudaStreamSynchronize(0);
 }
 
+__global__ void gSubtractMean(float* out, float* weights,
+                              size_t rows, size_t cols);
+
+void SubtractMean(Tensor* Out, Tensor &Weights);
+
 __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols);
 
 void Softmax(Tensor* Out);

From a573eecf5c9fb0eb83b7516ee03294ca6c3c682e Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 14 Sep 2016 14:33:30 +0100
Subject: [PATCH 07/10] debug

---
 src/test.cu | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/test.cu b/src/test.cu
index 9eb9b498..777b4b39 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -20,15 +20,19 @@ int main(int argc, char** argv) {
   
   Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
   Expr b = param(shape={1, LABEL_SIZE}, name="b0");
-  
+    
   Expr z = dot(x, w) + b;
   Expr lr = softmax(z, axis=1, name="pred");
   Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
-  //cerr << "lr=" << Debug(lr.val().shape()) << endl;
+  //cerr << "x=" << Debug(lr.val().shape()) << endl;
 
   int numofdata;
-  vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
-  vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
+  //vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
+  //vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
+  vector<float> images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE);
+  vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE);
+
+
   cerr << "images=" << images.size() << " labels=" << labels.size() << endl;
   cerr << "numofdata=" << numofdata << endl;
 
@@ -38,14 +42,20 @@ int main(int argc, char** argv) {
   tx.Load(images);
   ty.Load(labels);
 
-  cerr << "tx=" << Debug(tx.shape()) << endl;
-  cerr << "ty=" << Debug(ty.shape()) << endl;
+  //cerr << "tx=" << Debug(tx.shape()) << endl;
+  //cerr << "ty=" << Debug(ty.shape()) << endl;
 
   x = tx;
   y = ty;
 
+  cerr << "x=" << Debug(x.val().shape()) << endl;
+  cerr << "y=" << Debug(y.val().shape()) << endl;
+
+
   graph.forward(500);
 
+  cerr << "w=" << Debug(w.val().shape()) << endl;
+  cerr << "b=" << Debug(b.val().shape()) << endl;
   std::cerr << "z: " << Debug(z.val().shape()) << endl;
   std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
   std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;

From 08bdf340d99253669403962dccd3c709f07b3784 Mon Sep 17 00:00:00 2001
From: Andre Martins <andre.t.martins@gmail.com>
Date: Wed, 14 Sep 2016 14:59:04 +0100
Subject: [PATCH 08/10] Fixed beta in the softmax backprop; made the tanh and
 sigmoid backprop more efficient by using the cached activations.

---
 src/graph_operators.h | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/graph_operators.h b/src/graph_operators.h
index 88c174c0..972456db 100644
--- a/src/graph_operators.h
+++ b/src/graph_operators.h
@@ -14,7 +14,7 @@ struct InputNode : public Node {
                    !Has(keywords::lazy_shape),
                    "Data items require shape information");
   }
-  
+
   virtual void setVal(Tensor t)  {
     val_ = t;
     shape_ = t.shape();
@@ -33,7 +33,7 @@ struct ConstantNode : public Node {
                    !Has(keywords::lazy_shape),
                    "Constant items require shape information");
   }
-  
+
   void forward() {}
   void backward() {}
 };
@@ -47,23 +47,23 @@ struct ParamNode : public Node {
     UTIL_THROW_IF2(!Has(keywords::shape) &&
                    !Has(keywords::lazy_shape),
                    "Param items require shape information");
-  } 
-  
+  }
+
   void forward() {}
   void backward() {}
-  
+
   virtual void allocate(size_t batchSize) {
     val_.allocate(shape_);
     init_(val_);
   }
-  
+
   private:
     std::function<void(Tensor)> init_;
 };
 
 struct UnaryNodeOp : public Node {
     ChainPtr a_;
-    
+
     template <typename ...Args>
     UnaryNodeOp(ChainPtr a, Args ...args)
     : Node(args...), a_(a) {}
@@ -73,15 +73,15 @@ struct SigmoidNodeOp : public UnaryNodeOp {
   template <typename ...Args>
   SigmoidNodeOp(Args ...args)
   : UnaryNodeOp(args...) {  }
-  
+
   void forward() {
     Element(_1 = Sigma(_2),
             val_, a_->val());
   }
-  
+
   void backward() {
-    Element(_1 += _2 * Sigma(_3) * (1 - Sigma(_3)),
-            a_->grad(), adj_, a_->val());
+    Element(_1 += _2 * _3 * (1 - _3),
+            a_->grad(), adj_, val_);
   }
 };
 
@@ -89,15 +89,15 @@ struct TanhNodeOp : public UnaryNodeOp {
   template <typename ...Args>
   TanhNodeOp(Args ...args)
   : UnaryNodeOp(args...) { }
-  
+
   void forward() {
     Element(_1 = Tanh(_2),
             val_, a_->val());
   }
-  
+
   void backward() {
-    Element(_1 += _2 * (1 - Tanh(_3) * Tanh(_3)),
-            a_->grad(), adj_, a_->val());
+    Element(_1 += _2 * (1 - _3 * _3),
+            a_->grad(), adj_, val_);
   }
 };
 
@@ -106,7 +106,6 @@ struct SoftmaxNodeOp : public UnaryNodeOp {
     SoftmaxNodeOp(ChainPtr a, Args ...args)
     : UnaryNodeOp(a, keywords::shape=newShape(a),
                   args...) { }
-  
   Shape newShape(ChainPtr a) {
     Shape shape = a->shape();
     return shape;
@@ -117,14 +116,16 @@ struct SoftmaxNodeOp : public UnaryNodeOp {
     val_ = a_->val();
     Softmax(&val_);
   }
-  
+
   void backward() {
     // For each row, the Jacobian times vector is given by:
     // J * dy = p .* (dy - avg*1)
     // where avg = p'*dy and p is the softmax output (probabilities).
     Tensor result = adj_;
     SubtractMean(&result, val_);
-    Prod(a_->grad(), adj_, result, false, false);
+    // beta set to 1.0 in gemm, C = alpha * dot(A,B) + beta * C
+    // to sum gradients from different graph parts.
+    Prod(a_->grad(), adj_, result, false, false, 1.0);
   }
 };
 
@@ -132,11 +133,11 @@ struct LogNodeOp : public UnaryNodeOp {
   template <typename ...Args>
   LogNodeOp(Args ...args)
   : UnaryNodeOp(args...) {}
-  
+
   void forward() {
     Element(_1 = Log(_2), val_, a_->val());
   }
-  
+
   void backward() {
     Element(_1 += _2 * 1.f / _3,
             a_->grad(), adj_, a_->val());
@@ -148,7 +149,7 @@ struct ExpNodeOp : public UnaryNodeOp {
     ExpNodeOp(ChainPtr a, Args ...args)
     : UnaryNodeOp(a, keywords::shape=newShape(a),
                   args...) { }
-  
+
   Shape newShape(ChainPtr a) {
     Shape shape = a->shape();
     return shape;
@@ -157,7 +158,7 @@ struct ExpNodeOp : public UnaryNodeOp {
   void forward() {
     Element(_1 = Exp(_2), val_, a_->val());
   }
-  
+
   void backward() {
     Element(_1 += _2 * Exp(_3),
             a_->grad(), adj_, a_->val());
@@ -168,11 +169,11 @@ struct NegNodeOp : public UnaryNodeOp {
   template <typename ...Args>
   NegNodeOp(Args ...args)
   : UnaryNodeOp(args...) { }
-    
+
   void forward() {
     Element(_1 = -_2, val_, a_->val());
   }
-  
+
   void backward() {
     Element(_1 += -_2, a_->grad(), adj_);
   }
@@ -197,7 +198,7 @@ struct DotNodeOp : public BinaryNodeOp {
   : BinaryNodeOp(a, b,
                  keywords::shape=newShape(a,b),
                  args...) { }
-  
+
   Shape newShape(ChainPtr a, ChainPtr b) {
     Shape shape1 = a->shape();
     Shape shape2 = b->shape();
@@ -206,12 +207,12 @@ struct DotNodeOp : public BinaryNodeOp {
     shape1[1] = shape2[1];
     return shape1;
   }
-  
+
   void forward() {
     // C = A*B
     Prod(val_, a_->val(), b_->val(), false, false);
   }
-  
+
   void backward() {
     // D is the adjoint, the matrix of derivatives
     // df/dA += D*B.T

From f0f0dbe9eee33db69c5efd246a8cd683843a20c7 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 14 Sep 2016 16:25:58 +0200
Subject: [PATCH 09/10] batches

---
 src/tensor.cu |  6 +++---
 src/tensor.h  | 10 ++++-----
 src/test.cu   | 57 ++++++++++++++++++++++++++++++---------------------
 3 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/src/tensor.cu b/src/tensor.cu
index c5619b99..398b696a 100644
--- a/src/tensor.cu
+++ b/src/tensor.cu
@@ -80,12 +80,12 @@ void Tensor::Load(const std::string &path)
   }
   strm.close();
 
-  Load(hostData);
+  Load(hostData.begin(), hostData.begin());
 }
 
-void Tensor::Load(const std::vector<float> &values)
+void Tensor::Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end)
 {
-	pimpl_->set(values);
+	pimpl_->set(begin, end);
 }
 
 }
diff --git a/src/tensor.h b/src/tensor.h
index 83965508..d6acea11 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -158,11 +158,11 @@ class TensorImpl {
       thrust::fill(data_.begin(), data_.end(), value);
     }
 
-    void set(const std::vector<Float> &values) {
+    void set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end) {
 	  size_t totSize = GetTotalSize(shape());
-	  std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl;
-	  assert(totSize == values.size());
-	  thrust::copy(values.begin(), values.end(), data_.begin());
+	  //std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl;
+	  //assert(totSize == values.size());
+	  thrust::copy(begin, end, data_.begin());
     }
 
     std::string Debug() const
@@ -275,7 +275,7 @@ class Tensor {
     }
 
     void Load(const std::string &path);
-    void Load(const std::vector<float> &values);
+    void Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);
 
 };
 
diff --git a/src/test.cu b/src/test.cu
index 777b4b39..a78e182f 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -12,6 +12,7 @@ int main(int argc, char** argv) {
   using namespace marian;
   using namespace keywords;
   
+  const size_t BATCH_SIZE = 500;
   const size_t IMAGE_SIZE = 784;
   const size_t LABEL_SIZE = 10;
 
@@ -31,41 +32,51 @@ int main(int argc, char** argv) {
   //vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
   vector<float> images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE);
   vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE);
-
-
   cerr << "images=" << images.size() << " labels=" << labels.size() << endl;
   cerr << "numofdata=" << numofdata << endl;
 
-  Tensor tx({numofdata, IMAGE_SIZE}, 1);
-  Tensor ty({numofdata, LABEL_SIZE}, 1);
+  size_t startInd = 0;
+  size_t startIndData = 0;
+  while (startInd < numofdata) {
+	  size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd;
+	  cerr << "startInd=" << startInd
+			  << " startIndData=" << startIndData
+			  << " batchSize=" << batchSize << endl;
 
-  tx.Load(images);
-  ty.Load(labels);
+	  Tensor tx({numofdata, IMAGE_SIZE}, 1);
+	  Tensor ty({numofdata, LABEL_SIZE}, 1);
 
-  //cerr << "tx=" << Debug(tx.shape()) << endl;
-  //cerr << "ty=" << Debug(ty.shape()) << endl;
+	  tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE);
+	  ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize);
 
-  x = tx;
-  y = ty;
+	  //cerr << "tx=" << Debug(tx.shape()) << endl;
+	  //cerr << "ty=" << Debug(ty.shape()) << endl;
 
-  cerr << "x=" << Debug(x.val().shape()) << endl;
-  cerr << "y=" << Debug(y.val().shape()) << endl;
+	  x = tx;
+	  y = ty;
+
+	  cerr << "x=" << Debug(x.val().shape()) << endl;
+	  cerr << "y=" << Debug(y.val().shape()) << endl;
 
 
-  graph.forward(500);
+	  graph.forward(batchSize);
 
-  cerr << "w=" << Debug(w.val().shape()) << endl;
-  cerr << "b=" << Debug(b.val().shape()) << endl;
-  std::cerr << "z: " << Debug(z.val().shape()) << endl;
-  std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
-  std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
+	  cerr << "w=" << Debug(w.val().shape()) << endl;
+	  cerr << "b=" << Debug(b.val().shape()) << endl;
+	  std::cerr << "z: " << Debug(z.val().shape()) << endl;
+	  std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
+	  std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 
-  //std::cerr << "scores=" << scores.val().Debug() << endl;
-  std::cerr << "lr=" << lr.val().Debug() << endl;
+	  //std::cerr << "scores=" << scores.val().Debug() << endl;
+	  std::cerr << "lr=" << lr.val().Debug() << endl;
 
-  graph.backward();
-  
-  //std::cerr << graph["pred"].val()[0] << std::endl;
+	  graph.backward();
+
+	  //std::cerr << graph["pred"].val()[0] << std::endl;
+
+	  startInd += batchSize;
+	  startIndData += batchSize * IMAGE_SIZE;
+  }
   
 
    // XOR

From dcdfcd80cc71be3f6532879cf9de6f01cb7a78ec Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@zisa>
Date: Wed, 14 Sep 2016 14:34:26 +0000
Subject: [PATCH 10/10] less debug

---
 src/test.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test.cu b/src/test.cu
index a78e182f..0285e3a5 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -68,7 +68,7 @@ int main(int argc, char** argv) {
 	  std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
 
 	  //std::cerr << "scores=" << scores.val().Debug() << endl;
-	  std::cerr << "lr=" << lr.val().Debug() << endl;
+	  //std::cerr << "lr=" << lr.val().Debug() << endl;
 
 	  graph.backward();