From 78b8177d3b658034e81d2f1c1a72131e7f93a3b6 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 08:54:24 +0200
Subject: [PATCH 01/11] requires C++11, not 14

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 6bee418b..615e28ce 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Installation
 
 Requirements:
 
-* g++ with C++14
+* g++ with C++11
 * CUDA and CuDNN
 
 Exporting some paths for CuDNN may be required (put it, for example, in your `.bashrc` file):

From 92d1546589cfbdbbd9495fb1e0f8deaea4219b02 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 14:11:32 +0200
Subject: [PATCH 02/11] model dir

---
 src/CMakeLists.txt    |  1 +
 src/sgd.cu            | 68 +++++++++++++++++++++++++++++++++++++++++++
 src/sgd.h             | 58 ++++--------------------------------
 src/validate_mnist.cu |  2 +-
 4 files changed, 75 insertions(+), 54 deletions(-)
 create mode 100644 src/sgd.cu

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cb121111..6dc37391 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ cuda_add_library(marian_lib
   cnpy/cnpy.cpp
   exception.cpp
   expressions.cu 
+  sgd.cu
   tensor.cu	
   tensor_operators.cu
 )
diff --git a/src/sgd.cu b/src/sgd.cu
new file mode 100644
index 00000000..469d0976
--- /dev/null
+++ b/src/sgd.cu
@@ -0,0 +1,68 @@
+#include "sgd.h"
+#include "thrust_functions.h"
+
+namespace marian {
+SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
+    const std::vector<Expr*> params, float eta,
+    std::vector<float>& xData, size_t numFeatures,
+    std::vector<float>& yData, size_t numClasses,
+    size_t epochs, size_t batchSize)
+: cost_function_(&cost_func),
+  inX_(&inX),
+  inY_(&inY),
+  params_(params),
+  eta_(eta),
+  xData_(xData),
+  numFeatures_(numFeatures),
+  yData_(yData),
+  numClasses_(numClasses),
+  epochs_(epochs),
+  batchSize_(batchSize)
+{}
+
+void SGD::Run()
+{
+  size_t numExamples = xData_.size()/ numFeatures_;
+  Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
+  Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
+
+  for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
+    std::cerr << "Starting epoch #" << numEpoch << std::endl;
+    size_t startId = 0;
+    size_t endId = startId + batchSize_;
+
+    while (endId < numExamples) {
+      PrepareBatch(startId, endId, xt, yt);
+      *inX_ = xt;
+      *inY_ = yt;
+
+      cost_function_->forward(batchSize_);
+      cost_function_->backward();
+
+      UpdateModel();
+
+      startId += batchSize_;
+      endId += batchSize_;
+    }
+  }
+}
+
+void SGD::PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) {
+  std::vector<float> x(xData_.begin() + startId * numFeatures_,
+                       xData_.begin() + endId * numFeatures_);
+  std::vector<float> y(yData_.begin() + startId * numClasses_,
+                       yData_.begin() + endId * numClasses_);
+
+  xt.set(x);
+  yt.set(y);
+}
+
+void SGD::UpdateModel() {
+  for (auto& param : params_) {
+    using namespace thrust::placeholders;
+    Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
+  }
+}
+
+} // namespace
+
diff --git a/src/sgd.h b/src/sgd.h
index 0dab8df0..17bc038e 100644
--- a/src/sgd.h
+++ b/src/sgd.h
@@ -5,6 +5,7 @@
 
 #include "expressions.h"
 #include "thrust_functions.h"
+#include "tensor_operators.h"
 
 namespace marian {
 
@@ -14,62 +15,13 @@ class SGD {
         const std::vector<Expr*> params, float eta,
         std::vector<float>& xData, size_t numFeatures,
         std::vector<float>& yData, size_t numClasses,
-        size_t epochs, size_t batchSize)
-    : cost_function_(&cost_func),
-      inX_(&inX),
-      inY_(&inY),
-      params_(params),
-      eta_(eta),
-      xData_(xData),
-      numFeatures_(numFeatures),
-      yData_(yData),
-      numClasses_(numClasses),
-      epochs_(epochs),
-      batchSize_(batchSize)
-  {}
+        size_t epochs, size_t batchSize);
 
-    void Run() {
-      size_t numExamples = xData_.size()/ numFeatures_;
-      Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
-      Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
+    void Run();
 
-      for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
-        std::cerr << "Starting epoch #" << numEpoch << std::endl;
-        size_t startId = 0;
-        size_t endId = startId + batchSize_;
+    void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt);
 
-        while (endId < numExamples) {
-          PrepareBatch(startId, endId, xt, yt);
-          *inX_ = xt;
-          *inY_ = yt;
-
-          cost_function_->forward(batchSize_);
-          cost_function_->backward();
-
-          UpdateModel();
-
-          startId += batchSize_;
-          endId += batchSize_;
-        }
-      }
-    }
-
-    void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) {
-      std::vector<float> x(xData_.begin() + startId * numFeatures_,
-                           xData_.begin() + endId * numFeatures_);
-      std::vector<float> y(yData_.begin() + startId * numClasses_,
-                           yData_.begin() + endId * numClasses_);
-
-      xt.set(x);
-      yt.set(y);
-    }
-
-    void UpdateModel() {
-      for (auto& param : params_) {
-        using namespace thrust::placeholders;
-        Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
-      }
-    }
+    void UpdateModel();
 
   private:
     std::shared_ptr<Expr> cost_function_;
diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu
index e9b5735d..82f5daca 100644
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@@ -22,7 +22,7 @@ int main(int argc, char** argv) {
 
   
   std::cerr << "Loading model params...";
-  NpzConverter converter("../scripts/test_model/model.npz");
+  NpzConverter converter("../scripts/test_model_single/model.npz");
 
   std::vector<float> wData, bData;
   Shape wShape, bShape;

From b29628e0b6fc77e1880a9b97977925433b89d26b Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 14:17:04 +0200
Subject: [PATCH 03/11] model dir

---
 marian/.cproject            | 6 +++---
 src/validate_mnist_batch.cu | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/marian/.cproject b/marian/.cproject
index 2d8c666b..48ccc0b2 100644
--- a/marian/.cproject
+++ b/marian/.cproject
@@ -56,11 +56,11 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.924444438" name="train_mnist.cu" rcbsApplicability="disable" resourcePath="src/train_mnist.cu" toolsToInvoke="nvcc.compiler.base.1979453423.2078504098">
-						<tool id="nvcc.compiler.base.1979453423.2078504098" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
+					<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.843925199" name="validate_mnist_batch.cu" rcbsApplicability="disable" resourcePath="src/validate_mnist_batch.cu" toolsToInvoke="nvcc.compiler.base.1979453423.378728796">
+						<tool id="nvcc.compiler.base.1979453423.378728796" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
 					</fileInfo>
 					<sourceEntries>
-						<entry excluding="src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="src/validate_mnist_batch.cu|src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu
index ac4e7359..79645efb 100644
--- a/src/validate_mnist_batch.cu
+++ b/src/validate_mnist_batch.cu
@@ -21,7 +21,7 @@ int main(int argc, char** argv) {
   std::cerr << "\tDone." << std::endl;
 
   std::cerr << "Loading model params...";
-  NpzConverter converter("../scripts/test_model/model.npz");
+  NpzConverter converter("../scripts/test_model_single/model.npz");
 
   std::vector<float> wData;
   Shape wShape;

From f044b8dfbb8084579d0af232870805bef0242311 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 16:14:06 +0200
Subject: [PATCH 04/11] Shuffle data

---
 src/sgd.cu | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/sgd.h  | 14 ++++++++++----
 2 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/src/sgd.cu b/src/sgd.cu
index 469d0976..0213f6d5 100644
--- a/src/sgd.cu
+++ b/src/sgd.cu
@@ -1,6 +1,10 @@
+#include <algorithm>
+#include <vector>
 #include "sgd.h"
 #include "thrust_functions.h"
 
+using namespace std;
+
 namespace marian {
 SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
     const std::vector<Expr*> params, float eta,
@@ -22,17 +26,21 @@ SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
 
 void SGD::Run()
 {
+  std::srand ( unsigned ( std::time(0) ) );
+
   size_t numExamples = xData_.size()/ numFeatures_;
   Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
   Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
 
+  vector<size_t> shuffle = CreateShuffle(numExamples);
+
   for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
     std::cerr << "Starting epoch #" << numEpoch << std::endl;
     size_t startId = 0;
     size_t endId = startId + batchSize_;
 
     while (endId < numExamples) {
-      PrepareBatch(startId, endId, xt, yt);
+      PrepareBatch(startId, batchSize_, shuffle, xt, yt);
       *inX_ = xt;
       *inY_ = yt;
 
@@ -47,11 +55,55 @@ void SGD::Run()
   }
 }
 
-void SGD::PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) {
+std::vector<size_t> SGD::CreateShuffle(size_t numExamples) const {
+  vector<size_t> ret(numExamples);
+  std::iota(ret.begin(), ret.end(), 1);
+  std::random_shuffle ( ret.begin(), ret.end() );
+
+  for (size_t i = 0; i < ret.size(); ++i) {
+	  cerr << ret[i] << " ";
+  }
+
+  return ret;
+}
+
+void SGD::PrepareBatch(
+		size_t startId,
+		size_t batchSize,
+		const std::vector<size_t> &shuffle,
+		Tensor& xt,
+		Tensor& yt) {
+  /*
   std::vector<float> x(xData_.begin() + startId * numFeatures_,
                        xData_.begin() + endId * numFeatures_);
   std::vector<float> y(yData_.begin() + startId * numClasses_,
                        yData_.begin() + endId * numClasses_);
+  */
+  std::vector<float> x(batchSize * numFeatures_);
+  std::vector<float> y(batchSize * numClasses_);
+
+  std::vector<float>::iterator startXIter = x.begin();
+  std::vector<float>::iterator startYIter = y.begin();
+
+  size_t endId = startId + batchSize;
+  for (size_t i = startId; i < endId; ++i) {
+    size_t startXDataId = i * numFeatures_;
+    size_t startYDataId = i * numClasses_;
+
+    size_t endXDataId = startXDataId + batchSize * numFeatures_;
+    size_t endYDataId = startYDataId + batchSize * numClasses_;
+
+    std::copy(xData_.begin() + startXDataId,
+        xData_.begin() + endXDataId,
+        startXIter);
+
+    std::copy(yData_.begin() + startYDataId,
+        yData_.begin() + endYDataId,
+        startYIter);
+
+    startXIter += batchSize * numFeatures_;
+    startYIter += batchSize * numClasses_;
+  }
 
   xt.set(x);
   yt.set(y);
diff --git a/src/sgd.h b/src/sgd.h
index 17bc038e..fedfa8a5 100644
--- a/src/sgd.h
+++ b/src/sgd.h
@@ -19,10 +19,6 @@ class SGD {
 
     void Run();
 
-    void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt);
-
-    void UpdateModel();
-
   private:
     std::shared_ptr<Expr> cost_function_;
     std::shared_ptr<Expr> inX_;
@@ -35,6 +31,16 @@ class SGD {
     const size_t numClasses_;
     const size_t epochs_;
     const size_t batchSize_;
+
+    std::vector<size_t> CreateShuffle(size_t numExamples) const;
+    void PrepareBatch(
+    		size_t startId,
+    		size_t batchSize,
+    		const std::vector<size_t> &shuffle,
+    		Tensor& xt,
+    		Tensor& yt);
+
+    void UpdateModel();
 };
 
 } // namespace marian

From 88678e59bc16f58e10b5355efaa5a42046f4b152 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 16:31:12 +0200
Subject: [PATCH 05/11] don't use shuffle

---
 src/sgd.cu | 24 ++++++++++++------------
 src/sgd.h  |  3 ++-
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/sgd.cu b/src/sgd.cu
index 0213f6d5..d46ece86 100644
--- a/src/sgd.cu
+++ b/src/sgd.cu
@@ -21,7 +21,7 @@ SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
   yData_(yData),
   numClasses_(numClasses),
   epochs_(epochs),
-  batchSize_(batchSize)
+  maxBatchSize_(batchSize)
 {}
 
 void SGD::Run()
@@ -29,28 +29,28 @@ void SGD::Run()
   std::srand ( unsigned ( std::time(0) ) );
 
   size_t numExamples = xData_.size()/ numFeatures_;
-  Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
-  Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
+  Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
+  Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
 
   vector<size_t> shuffle = CreateShuffle(numExamples);
 
   for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
     std::cerr << "Starting epoch #" << numEpoch << std::endl;
     size_t startId = 0;
-    size_t endId = startId + batchSize_;
+    size_t endId = startId + maxBatchSize_;
 
     while (endId < numExamples) {
-      PrepareBatch(startId, batchSize_, shuffle, xt, yt);
+      PrepareBatch(startId, endId, maxBatchSize_, shuffle, xt, yt);
       *inX_ = xt;
       *inY_ = yt;
 
-      cost_function_->forward(batchSize_);
+      cost_function_->forward(maxBatchSize_);
       cost_function_->backward();
 
       UpdateModel();
 
-      startId += batchSize_;
-      endId += batchSize_;
+      startId += maxBatchSize_;
+      endId += maxBatchSize_;
     }
   }
 }
@@ -69,23 +69,23 @@ std::vector<size_t> SGD::CreateShuffle(size_t numExamples) const {
 
 void SGD::PrepareBatch(
 		size_t startId,
+		size_t endId,
 		size_t batchSize,
 		const std::vector<size_t> &shuffle,
 		Tensor& xt,
 		Tensor& yt) {
-  /*
+
   std::vector<float> x(xData_.begin() + startId * numFeatures_,
                        xData_.begin() + endId * numFeatures_);
   std::vector<float> y(yData_.begin() + startId * numClasses_,
                        yData_.begin() + endId * numClasses_);
-  */
+  /*
   std::vector<float> x(batchSize * numFeatures_);
   std::vector<float> y(batchSize * numClasses_);
 
   std::vector<float>::iterator startXIter = x.begin();
   std::vector<float>::iterator startYIter = y.begin();
 
-  size_t endId = startId + batchSize;
   for (size_t i = startId; i < endId; ++i) {
     size_t startXDataId = i * numFeatures_;
     size_t startYDataId = i * numClasses_;
@@ -104,7 +104,7 @@ void SGD::PrepareBatch(
     startXIter += batchSize * numFeatures_;
     startYIter += batchSize * numClasses_;
   }
-
+  */
   xt.set(x);
   yt.set(y);
 }
diff --git a/src/sgd.h b/src/sgd.h
index fedfa8a5..c5ea8dbc 100644
--- a/src/sgd.h
+++ b/src/sgd.h
@@ -30,11 +30,12 @@ class SGD {
     std::vector<float>& yData_;
     const size_t numClasses_;
     const size_t epochs_;
-    const size_t batchSize_;
+    const size_t maxBatchSize_;
 
     std::vector<size_t> CreateShuffle(size_t numExamples) const;
     void PrepareBatch(
     		size_t startId,
+    		size_t endId,
     		size_t batchSize,
     		const std::vector<size_t> &shuffle,
     		Tensor& xt,

From 78f85fcfa424a2473f20acd7dc654b055f63fd01 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Thu, 15 Sep 2016 15:39:49 +0100
Subject: [PATCH 06/11] compiles on valhalla and doesn't segfault

---
 src/sgd.cu | 4 +++-
 src/sgd.h  | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/sgd.cu b/src/sgd.cu
index d46ece86..f864e6db 100644
--- a/src/sgd.cu
+++ b/src/sgd.cu
@@ -1,3 +1,4 @@
+#include <ctime>
 #include <algorithm>
 #include <vector>
 #include "sgd.h"
@@ -32,7 +33,8 @@ void SGD::Run()
   Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
   Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
 
-  vector<size_t> shuffle = CreateShuffle(numExamples);
+  //vector<size_t> shuffle = CreateShuffle(numExamples);
+  vector<size_t> shuffle;
 
   for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
     std::cerr << "Starting epoch #" << numEpoch << std::endl;
diff --git a/src/sgd.h b/src/sgd.h
index c5ea8dbc..33364049 100644
--- a/src/sgd.h
+++ b/src/sgd.h
@@ -20,9 +20,9 @@ class SGD {
     void Run();
 
   private:
-    std::shared_ptr<Expr> cost_function_;
-    std::shared_ptr<Expr> inX_;
-    std::shared_ptr<Expr> inY_;
+    Expr *cost_function_;
+    Expr *inX_;
+    Expr *inY_;
     std::vector<Expr*> params_;
     const float eta_;
     std::vector<float>& xData_;

From 037679f8a3d41e5d71013735563929ab208510a5 Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <romang@amu.edu.pl>
Date: Thu, 15 Sep 2016 17:02:25 +0200
Subject: [PATCH 07/11] update requirements and paths to be exported

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6bee418b..67685375 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,15 @@ Installation
 
 Requirements:
 
-* g++ with C++14
+* g++ with c++11
 * CUDA and CuDNN
+* Boost (>= 1.56)
 
 Exporting some paths for CuDNN may be required (put it, for example, in your `.bashrc` file):
 
     export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin
-    export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64
+    export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64
     export CPATH=$CPATH:/usr/local/cudnn-5/include
 
 Compilation with `cmake > 3.5`:

From 4a1ab1f5b458814522c7c2f897999d1e74534124 Mon Sep 17 00:00:00 2001
From: Maximiliana Behnke <maxib@fatboy.lan>
Date: Thu, 15 Sep 2016 17:41:24 +0200
Subject: [PATCH 08/11] Change path to Keras single layer model

---
 src/validate_mnist.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu
index 9d9cdf8b..43e1fedc 100644
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@@ -21,7 +21,7 @@ int main(int argc, char** argv) {
   std::cerr << "Done." << std::endl;
 
   std::cerr << "Loading model params...";
-  NpzConverter converter("../scripts/test_model/model.npz");
+  NpzConverter converter("../scripts/test_model_single/model.npz");
 
   std::vector<float> wData, bData;
   Shape wShape, bShape;

From eba5b462257a9949fd124378c53e9bf7b357b1d3 Mon Sep 17 00:00:00 2001
From: Maximiliana Behnke <maxib@fatboy.lan>
Date: Thu, 15 Sep 2016 17:41:58 +0200
Subject: [PATCH 09/11] Validation on 2-layer Keras model

---
 src/validate_mnist_batch.cu | 51 ++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu
index ac4e7359..e2e6438c 100644
--- a/src/validate_mnist_batch.cu
+++ b/src/validate_mnist_batch.cu
@@ -21,22 +21,38 @@ int main(int argc, char** argv) {
   std::cerr << "\tDone." << std::endl;
 
   std::cerr << "Loading model params...";
-  NpzConverter converter("../scripts/test_model/model.npz");
+  NpzConverter converter("../scripts/test_model_multi/model.npz");
 
-  std::vector<float> wData;
-  Shape wShape;
-  converter.Load("weights", wData, wShape);
+  std::vector<float> wData1;
+  Shape wShape1;
+  converter.Load("weights1", wData1, wShape1);
+  
+  std::vector<float> bData1;
+  Shape bShape1;
+  converter.Load("bias1", bData1, bShape1);
+  
+  std::vector<float> wData2;
+  Shape wShape2;
+  converter.Load("weights2", wData2, wShape2);
+  
+  std::vector<float> bData2;
+  Shape bShape2;
+  converter.Load("bias2", bData2, bShape2);
 
-  std::vector<float> bData;
-  Shape bShape;
-  converter.Load("bias", bData, bShape);
-
-  auto initW = [wData](Tensor t) {
-    t.set(wData);
+  auto initW1 = [wData1](Tensor t) {
+    t.set(wData1);
   };
 
-  auto initB = [bData](Tensor t) {
-    t.set(bData);
+  auto initB1 = [bData1](Tensor t) {
+    t.set(bData1);
+  };
+  
+  auto initW2 = [wData2](Tensor t) {
+    t.set(wData2);
+  };
+
+  auto initB2 = [bData2](Tensor t) {
+    t.set(bData2);
   };
 
   std::cerr << "\tDone." << std::endl;
@@ -45,11 +61,15 @@ int main(int argc, char** argv) {
   auto x = input(shape={whatevs, IMAGE_SIZE}, name="X");
   auto y = input(shape={whatevs, LABEL_SIZE}, name="Y");
 
-  auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
-  auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
+  auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1);
+  auto b1 = param(shape={1, 100}, name="b0", init=initB1);
+  auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
+  auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2);
 
   std::cerr << "Building model...";
-  auto predict = softmax(dot(x, w) + b, axis=1, name="pred");
+  auto layer1 = tanh(dot(x, w1) + b1);
+  auto layer2 = softmax(dot(layer1, w2) + b2, axis=1, name="layer2");
+  auto predict = layer2;
 
   std::cerr << "Done." << std::endl;
 
@@ -77,6 +97,7 @@ int main(int argc, char** argv) {
         if (testLabels[startId * LABEL_SIZE + i + j]) correct = j;
         if (results[i + j] > results[i + predicted]) predicted = j;
       }
+      /*std::cerr << "CORRECT: " << correct << " PREDICTED: " << predicted << std::endl;*/
       acc += (correct == predicted);
     }
 

From 00d688b9babe22874c173cea8a23960c975fedaa Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Thu, 15 Sep 2016 17:03:45 +0100
Subject: [PATCH 10/11] shuffling doesn't crash

---
 src/sgd.cu | 65 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/src/sgd.cu b/src/sgd.cu
index f864e6db..26121f2f 100644
--- a/src/sgd.cu
+++ b/src/sgd.cu
@@ -33,8 +33,8 @@ void SGD::Run()
   Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
   Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
 
-  //vector<size_t> shuffle = CreateShuffle(numExamples);
-  vector<size_t> shuffle;
+  vector<size_t> shuffle = CreateShuffle(numExamples);
+  //vector<size_t> shuffle;
 
   for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
     std::cerr << "Starting epoch #" << numEpoch << std::endl;
@@ -59,13 +59,14 @@ void SGD::Run()
 
 std::vector<size_t> SGD::CreateShuffle(size_t numExamples) const {
   vector<size_t> ret(numExamples);
-  std::iota(ret.begin(), ret.end(), 1);
+  std::iota(ret.begin(), ret.end(), 0);
   std::random_shuffle ( ret.begin(), ret.end() );
-
+  /*
+  cerr << "shuffled" << endl;
   for (size_t i = 0; i < ret.size(); ++i) {
-	  cerr << ret[i] << " ";
+    cerr << ret[i] << " ";
   }
-
+  */
   return ret;
 }
 
@@ -76,37 +77,57 @@ void SGD::PrepareBatch(
 		const std::vector<size_t> &shuffle,
 		Tensor& xt,
 		Tensor& yt) {
-
+  /*
   std::vector<float> x(xData_.begin() + startId * numFeatures_,
                        xData_.begin() + endId * numFeatures_);
   std::vector<float> y(yData_.begin() + startId * numClasses_,
                        yData_.begin() + endId * numClasses_);
-  /*
+  */
   std::vector<float> x(batchSize * numFeatures_);
   std::vector<float> y(batchSize * numClasses_);
-
-  std::vector<float>::iterator startXIter = x.begin();
-  std::vector<float>::iterator startYIter = y.begin();
-
+  
+  /*
+  cerr << "startId=" << startId
+       << " " << endId
+       << " " << batchSize
+       << endl;
+  cerr << "numExamples=" << shuffle.size() << endl;
+  cerr << "numFeatures_=" << numFeatures_ << " " << numClasses_ << endl;
+  cerr << "sizes=" << x.size() 
+       << " " << y.size() 
+       << " " << xData_.size()
+       << " " << yData_.size()
+       << endl;
+  */
+  size_t startXId = 0;
+  size_t startYId = 0;
+  
   for (size_t i = startId; i < endId; ++i) {
-    size_t startXDataId = i * numFeatures_;
-    size_t startYDataId = i * numClasses_;
-
-    size_t endXDataId = startXDataId + batchSize * numFeatures_;
-    size_t endYDataId = startYDataId + batchSize * numClasses_;
+    size_t ind = shuffle[i];
+    size_t startXDataId = ind * numFeatures_;
+    size_t startYDataId = ind * numClasses_;
 
+    size_t endXDataId = startXDataId + numFeatures_;
+    size_t endYDataId = startYDataId + numClasses_;
+    /*
+    cerr << "i=" << i
+    	 << " " << ind
+    	 << " " << startXDataId << "-" << endXDataId
+	 << " " << startYDataId << "-" << endYDataId
+	 << endl;
+    */
     std::copy(xData_.begin() + startXDataId,
         xData_.begin() + endXDataId,
-        startXIter);
+        x.begin() + startXId);
 
     std::copy(yData_.begin() + startYDataId,
         yData_.begin() + endYDataId,
-        startYIter);
+        y.begin() + startYId);
 
-    startXIter += batchSize * numFeatures_;
-    startYIter += batchSize * numClasses_;
+    startXId += numFeatures_;
+    startYId += numClasses_;
   }
-  */
+  
   xt.set(x);
   yt.set(y);
 }

From eeb18f33cd15c49e2140018956474896996b9889 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 15 Sep 2016 18:19:12 +0200
Subject: [PATCH 11/11] merge

---
 src/validate_mnist_batch.cu | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu
index 5746a433..1c66198a 100644
--- a/src/validate_mnist_batch.cu
+++ b/src/validate_mnist_batch.cu
@@ -21,11 +21,8 @@ int main(int argc, char** argv) {
   std::cerr << "\tDone." << std::endl;
 
   std::cerr << "Loading model params...";
-<<<<<<< HEAD
+
   NpzConverter converter("../scripts/test_model_single/model.npz");
-=======
-  NpzConverter converter("../scripts/test_model_multi/model.npz");
->>>>>>> eba5b462257a9949fd124378c53e9bf7b357b1d3
 
   std::vector<float> wData1;
   Shape wShape1;