From 6f7f0d77f157785237e5e99a6840e779212aad28 Mon Sep 17 00:00:00 2001
From: Marcin Junczys-Dowmunt <junczys@amu.edu.pl>
Date: Thu, 15 Sep 2016 09:44:03 +0200
Subject: [PATCH] some clean-up

---
 src/expression_operators.h |  1 -
 src/graph_operators.h      | 10 ++++++++--
 src/tensor.h               |  2 --
 src/validate_mnist.cu      | 31 +++++++++++++++++++++++--------
 4 files changed, 31 insertions(+), 13 deletions(-)
diff --git a/src/expression_operators.h b/src/expression_operators.h
index 3d42400f..878f9882 100644
--- a/src/expression_operators.h
+++ b/src/expression_operators.h
@@ -94,7 +94,6 @@ Expr broadcast(Shape bShape, Expr a) {
                      "Cannot broadcast tensor dimension "
                      << dimA << " to " << dimB);
       if(dimA == 1 && dimB != 1) {
-        std::cerr << "Broadcasting dim " << i << " from " << dimA << " to " << dimB << std::endl;
         if(i == 0) {
           Expr one = ones(keywords::shape={bShape[0], 1});
           a = dot(one, a);
diff --git a/src/graph_operators.h b/src/graph_operators.h
index 2b03d4c1..f231103b 100644
--- a/src/graph_operators.h
+++ b/src/graph_operators.h
@@ -132,15 +132,21 @@ struct ArgmaxOp : public UnaryNodeOp {
   
   void forward() {
     //val_ = Argmax(a_->val(), axis_);
+    UTIL_THROW2("Not implemented");    
   }
   
-  void backward() {}
+  void backward() {
+    UTIL_THROW2("Not implemented");    
+  }
   
   private:
     int axis_;
 };
 
-
+// @TODO, make this numerically safe(r):
+// softmax(X) = softmax_safe(X - max(X, axis=1))
+// Probably best to do this directly in Softmax
+// function. 
 struct SoftmaxNodeOp : public UnaryNodeOp {
   template <typename ...Args>
     SoftmaxNodeOp(ChainPtr a, Args ...args)
diff --git a/src/tensor.h b/src/tensor.h
index 40bca4f8..b13e55fe 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -52,8 +52,6 @@ class TensorImpl {
       UTIL_THROW_IF2(shape_.size() < 1 || shape_.size() > 4,
                      "Wrong number of dimensions: " << shape_.size());
 
-      std::cerr << "Allocating : " << shape[0] << " " << shape[1] << std::endl;
-
       int size = GetTotalSize(shape_);
       data_.resize(size, value);
     }
diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu
index 8834e745..2226eb16 100644
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@@ -40,8 +40,8 @@ int main(int argc, char** argv) {
   auto b = param(shape={1, LABEL_SIZE},
                  init=[bData](Tensor t) { t.set(bData); });
 
-  auto predict = softmax(dot(x, w) + b, axis=1);
-  auto graph = -mean(sum(y * log(predict), axis=1), axis=0);
+  auto probs = softmax(dot(x, w) + b, axis=1);
+  auto graph = -mean(sum(y * log(probs), axis=1), axis=0);
   
   std::cerr << "Done." << std::endl;
 
@@ -52,9 +52,24 @@ int main(int argc, char** argv) {
   y = yt << testLabels;
   
   graph.forward(BATCH_SIZE);
+  auto results = probs.val();
+  std::vector<float> resultsv(results.size());
+  resultsv << results;
   
+  size_t acc = 0;
+  for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
+    size_t correct = 0;
+    size_t probsed = 0;
+    for (size_t j = 0; j < LABEL_SIZE; ++j) {
+      if (testLabels[i+j]) correct = j;
+      if (resultsv[i + j] > resultsv[i + probsed]) probsed = j;
+    }
+    acc += (correct == probsed);
+  }
+  std::cerr << "Cost: " << graph.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+
   float eta = 0.1;
-  for (size_t j = 0; j < 100; ++j) {
+  for (size_t j = 0; j < 10; ++j) {
     for(size_t i = 0; i < 60; ++i) {    
       graph.backward();
     
@@ -65,21 +80,21 @@ int main(int argc, char** argv) {
       graph.forward(BATCH_SIZE);
     }
     std::cerr << "Epoch: " << j << std::endl;
-    auto results = predict.val();
+    auto results = probs.val();
     std::vector<float> resultsv(results.size());
     resultsv << results;
     
     size_t acc = 0;
     for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
       size_t correct = 0;
-      size_t predicted = 0;
+      size_t probsed = 0;
       for (size_t j = 0; j < LABEL_SIZE; ++j) {
         if (testLabels[i+j]) correct = j;
-        if (resultsv[i + j] > resultsv[i + predicted]) predicted = j;
+        if (resultsv[i + j] > resultsv[i + probsed]) probsed = j;
       }
-      acc += (correct == predicted);
+      acc += (correct == probsed);
     }
-    std::cerr << "Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+    std::cerr << "Cost: " << graph.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
   }
   return 0;
 }