Merged.

2024-09-17 09:47:34 +03:00 · 2016-09-16 18:36:30 +01:00 · 2016-09-16 18:36:30 +01:00 · 8f1a7382ec
commit 8f1a7382ec
parent c54eaf17d5 2892fe0feb
20 changed files with 2508 additions and 239 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -20,3 +20,14 @@ endif(Boost_FOUND)

 include_directories(${marian_SOURCE_DIR}/src)
 add_subdirectory(src)
+
+# add a target to generate API documentation with Doxygen
+find_package(Doxygen)
+if(DOXYGEN_FOUND)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
+    add_custom_target(doc
+        ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating API documentation with Doxygen" VERBATIM
+    )
+endif(DOXYGEN_FOUND)
--- a/Doxyfile.in
+++ b/Doxyfile.in
--- a/LICENSE.md
+++ b/LICENSE.md
@ -1,5 +1,3 @@
-The MIT License (MIT)
-
 Copyright (c) 2016 Marcin Junczys-Dowmunt

 Permission is hereby granted, free of charge, to any person obtaining a copy
@ -9,8 +7,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--- a/README.md
+++ b/README.md
@ -29,3 +29,6 @@ Compilation with `cmake > 3.5`:
    cmake ..
    make -j

+To compile API documentation using Doxygen, first cd to the build directory, and then:
+
+    make doc
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -5,7 +5,6 @@ cuda_add_library(marian_lib
  cnpy/cnpy.cpp
  exception.cpp
  expression_graph.cu 
-  sgd.cu
  tensor.cu
  tensor_operators.cu
  expression_operators.cu
--- a/src/expression_graph.cu
+++ b/src/expression_graph.cu
@ -1,8 +1,6 @@
 #include <sstream>
 #include "expression_graph.h"

-using namespace std;
-
 namespace marian {

 Expr::Expr(ExpressionGraphPtr g, Chainable<Tensor>* chainable)
@ -32,19 +30,10 @@ Expr::operator ChainPtr() {

 std::string Expr::Debug() const
 {
-	stringstream strm;
+	std::stringstream strm;
 	const Shape &shape = pimpl_->shape();
 	strm << marian::Debug(shape);
 	return strm.str();
 }

-///////////////////////////////////////////////////////
-ExpressionGraph::ExpressionGraph(int cudaDevice)
-: stack_(new ChainableStack)
-{
-  std::srand (time(NULL));
-  cudaSetDevice(0);
-
-}
-
 }
--- a/src/expression_graph.h
+++ b/src/expression_graph.h
@ -38,9 +38,14 @@ class Expr {

 class ExpressionGraph {
  public:
-    ExpressionGraph(int cudaDevice);
+    ExpressionGraph() : stack_(new ChainableStack) {}
    
-    void forward(size_t batchSize) {
+    void backprop(int batchSize) {
+      forward(batchSize);
+      backward();
+    }
+    
+    void forward(int batchSize) {
      for(auto&& v : *stack_) {
        v->allocate(batchSize);
      }
@ -48,6 +53,16 @@ class ExpressionGraph {
        v->forward();    
    }
    
+    void backward() {
+      for(auto&& v : *stack_)
+        v->set_zero_adjoint();
+    
+      typedef typename ChainableStack::reverse_iterator It;
+      stack_->back()->init_dependent();
+      for(It it = stack_->rbegin(); it != stack_->rend(); ++it)
+        (*it)->backward();
+    }
+    
    std::string graphviz() {
      std::stringstream ss;
      ss << "digraph ExpressionGraph {" << std::endl;
@ -60,19 +75,13 @@ class ExpressionGraph {
      return ss.str();
    }
    
-    void backward() {
-      for(auto&& v : *stack_)
-        v->set_zero_adjoint();
-    
-      typedef typename ChainableStack::reverse_iterator It;
-      stack_->back()->init_dependent();
-      for(It it = stack_->rbegin(); it != stack_->rend(); ++it)
-        (*it)->backward();
-    }
+    /*********************************************************/
    
    template <typename ...Args>
    inline Expr input(Args ...args) {
-      return Expr(this, new InputNode(args...));
+      Expr e(this, new InputNode(args...));
+      inputs_.emplace_back(e);
+      return e;
    }
    
    template <typename ...Args>
@ -117,14 +126,20 @@ class ExpressionGraph {
      named_.emplace(name, e);
    }
    
+    std::vector<Expr>& inputs() {
+      return inputs_;
+    }
+    
    std::vector<Expr>& params() {
      return params_;
    }
    
  private:
    ChainableStackPtr stack_;
+    
    std::map<std::string, Expr> named_;
    std::vector<Expr> params_;
+    std::vector<Expr> inputs_;
 };

 }
--- a/src/expression_operators.cu
+++ b/src/expression_operators.cu
@ -29,7 +29,7 @@ Expr operator-(Expr a) {
  return Expr(a.graph(), new NegNodeOp(a));
 };

-Expr softmax_fast(Expr a) {
+Expr softmax(Expr a) {
  return Expr(a.graph(), new SoftmaxNodeOp(a));
 }

--- a/src/expression_operators.h
+++ b/src/expression_operators.h
@ -72,12 +72,12 @@ inline Expr sum(Expr a, Args ...args) {

 // inefficient
 template <typename ...Args>
-Expr softmax(Expr a, Args ...args) {
+Expr softmax_slow(Expr a, Args ...args) {
  Expr e = exp(a);
  return e / sum(e, args...);
 }

-Expr softmax_fast(Expr a);
+Expr softmax(Expr a);

 // inefficient
 template <typename ...Args>
--- a/src/sgd.cu
+++ b/src/sgd.cu
@ -1,140 +0,0 @@
-#include <ctime>
-#include <algorithm>
-#include <vector>
-#include "sgd.h"
-#include "thrust_functions.h"
-
-using namespace std;
-
-namespace marian {
-SGD::SGD(ExpressionGraph& g, float eta,
-    std::vector<float>& xData, size_t numFeatures,
-    std::vector<float>& yData, size_t numClasses,
-    size_t epochs, size_t batchSize)
-: graph_(g),
-  eta_(eta),
-  xData_(xData),
-  numFeatures_(numFeatures),
-  yData_(yData),
-  numClasses_(numClasses),
-  epochs_(epochs),
-  maxBatchSize_(batchSize)
-{}
-
-void SGD::Run()
-{
-  size_t numExamples = xData_.size()/ numFeatures_;
-  Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
-  Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
-
-  vector<size_t> shuffle = CreateShuffle(numExamples);
-  //vector<size_t> shuffle;
-
-  for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
-    std::cerr << "Starting epoch #" << numEpoch << std::endl;
-    size_t startId = 0;
-
-    while (startId < numExamples) {
-      size_t batchSize = std::min(maxBatchSize_, numExamples - startId);
-      size_t endId = startId + batchSize;
-
-      PrepareBatch(startId, endId, batchSize, shuffle, xt, yt);
-      graph_["x"] = xt;
-      graph_["y"] = yt;
-
-      graph_.forward(maxBatchSize_);
-      graph_.backward();
-
-      UpdateModel();
-
-      startId += maxBatchSize_;
-    }
-  }
-}
-
-std::vector<size_t> SGD::CreateShuffle(size_t numExamples) const {
-  vector<size_t> ret(numExamples);
-  std::iota(ret.begin(), ret.end(), 0);
-  std::random_shuffle ( ret.begin(), ret.end() );
-  /*
-  cerr << "shuffled" << endl;
-  for (size_t i = 0; i < ret.size(); ++i) {
-    cerr << ret[i] << " ";
-  }
-  */
-  return ret;
-}
-
-void SGD::PrepareBatch(
-		size_t startId,
-		size_t endId,
-		size_t batchSize,
-		const std::vector<size_t> &shuffle,
-		Tensor& xt,
-		Tensor& yt) {
-  /*
-  std::vector<float> x(xData_.begin() + startId * numFeatures_,
-                       xData_.begin() + endId * numFeatures_);
-  std::vector<float> y(yData_.begin() + startId * numClasses_,
-                       yData_.begin() + endId * numClasses_);
-  */
-  std::vector<float> x(batchSize * numFeatures_);
-  std::vector<float> y(batchSize * numClasses_);
-  
-  //cerr << "batchSize=" << batchSize << endl;
-  /*
-  cerr << "startId=" << startId
-       << " " << endId
-       << " " << batchSize
-       << endl;
-  cerr << "numExamples=" << shuffle.size() << endl;
-  cerr << "numFeatures_=" << numFeatures_ << " " << numClasses_ << endl;
-  cerr << "sizes=" << x.size() 
-       << " " << y.size() 
-       << " " << xData_.size()
-       << " " << yData_.size()
-       << endl;
-  */
-  size_t startXId = 0;
-  size_t startYId = 0;
-  
-  for (size_t i = startId; i < endId; ++i) {
-    size_t ind = shuffle[i];
-    size_t startXDataId = ind * numFeatures_;
-    size_t startYDataId = ind * numClasses_;
-
-    size_t endXDataId = startXDataId + numFeatures_;
-    size_t endYDataId = startYDataId + numClasses_;
-    /*
-    cerr << "i=" << i
-    	 << " " << ind
-    	 << " " << startXDataId << "-" << endXDataId
-	 << " " << startYDataId << "-" << endYDataId
-	 << endl;
-    */
-
-    std::copy(xData_.begin() + startXDataId,
-        xData_.begin() + endXDataId,
-        x.begin() + startXId);
-
-    std::copy(yData_.begin() + startYDataId,
-        yData_.begin() + endYDataId,
-        y.begin() + startYId);
-
-    startXId += numFeatures_;
-    startYId += numClasses_;
-  }
-  
-  xt.set(x);
-  yt.set(y);
-}
-
-void SGD::UpdateModel() {
-  for (auto& param : graph_.params()) {
-    using namespace thrust::placeholders;
-    Element(_1 -= eta_ * _2, param.val(), param.grad());
-  }
-}
-
-} // namespace
-
--- a/src/sgd.h
+++ b/src/sgd.h
@ -1,43 +1,51 @@
 #pragma once

-#include <memory>
-#include <iostream>
-
-#include "expression_graph.h"
-#include "thrust_functions.h"
+#include <map>
+#include <boost/any.hpp>
 #include "tensor_operators.h"

 namespace marian {

-class SGD {
+class Sgd {
  public:
-    SGD(ExpressionGraph& g, float eta,
-        std::vector<float>& xData, size_t numFeatures,
-        std::vector<float>& yData, size_t numClasses,
-        size_t epochs, size_t batchSize);
+    Sgd(float eta=0.1) : eta_(eta) {}
    
-    void Run();
+    void operator()(ExpressionGraph& graph, int batchSize) {
+      graph.backprop(batchSize);
+      
+      for(auto& param : graph.params())
+        Element(_1 -= eta_ * _2,
+                param.val(), param.grad());
+    }
    
  private:
-    ExpressionGraph& graph_;
-    const float eta_;
-    std::vector<float>& xData_;
-    const size_t numFeatures_;
-    std::vector<float>& yData_;
-    const size_t numClasses_;
-    const size_t epochs_;
-    const size_t maxBatchSize_;
-
-    std::vector<size_t> CreateShuffle(size_t numExamples) const;
-    void PrepareBatch(
-    		size_t startId,
-    		size_t endId,
-    		size_t batchSize,
-    		const std::vector<size_t> &shuffle,
-    		Tensor& xt,
-    		Tensor& yt);
-
-    void UpdateModel();
+    float eta_;
 };

-} // namespace marian
+class Adagrad {
+  public:
+    Adagrad(float eta=0.1) : eta_(eta) {}
+    
+    void operator()(ExpressionGraph& graph, int batchSize) {
+      float fudgeFactor = 1e-6;
+      graph.backprop(batchSize);
+      
+      if(history_.size() < graph.params().size())
+        for(auto& param : graph.params())
+          history_.emplace_back(Tensor(param.grad().shape(), 0));
+      
+      auto it = history_.begin();
+      for(auto& param : graph.params()) {    
+        Element(_1 += _2 * _2, *it, param.grad());
+        Element(_1 -= eta_ / (fudgeFactor + Sqrt(_2)) * _3,
+                param.val(), *it, param.grad());
+        it++;
+      }
+    }
+    
+  private:
+    float eta_;
+    std::vector<Tensor> history_;
+};
+
+}
--- a/src/tensor.cu
+++ b/src/tensor.cu
@ -1,8 +1,6 @@
 #include <fstream>
 #include "tensor.h"

-using namespace std;
-
 namespace marian {

 void Tensor::set(const std::vector<float>& data)
--- a/src/tensor_operators.cu
+++ b/src/tensor_operators.cu
@ -1,5 +1,7 @@
 #include "tensor_operators.h"

+using namespace std;
+
 namespace marian {

 __global__ void gSubtractMean(float* out, float* weights,
@ -53,6 +55,7 @@ void SubtractMean(Tensor* Out, Tensor &Weights) {
  cudaStreamSynchronize(0);
 }

+///////////////////////////////////////////////////////
 __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) {
  for(int bid = 0; bid < rows; bid += gridDim.x) {
    int j = bid + blockIdx.x;
@ -97,6 +100,35 @@ void Softmax(Tensor* Out) {
  gSoftMax<<<blocks, threads, shared>>>(Out->data(), m, k);
  cudaStreamSynchronize(0);
 }
+///////////////////////////////////////////////////////
+__global__ void gArgMax(float *out, const float *data, size_t rows, size_t cols) {
+  size_t row = blockIdx.x;
+    size_t startInd = row * cols;
+    float maxScore = -99999;
+    size_t maxInd;
+    for (size_t col = 0; col < cols; ++col) {
+      size_t ind = startInd + col;
+      float score = data[ind];
+      if (score > maxScore) {
+        maxScore = score;
+        maxInd = col;
+      }
+    }
+    out[row] = maxInd;
+}
+
+void Argmax(Tensor* Out, const Tensor* In) {
+  size_t m = In->shape()[0];
+  size_t k = In->shape()[1];
+
+  int blocks = m; //std::min(MAX_BLOCKS, (int) m);
+  int threads = k; //std::min(MAX_THREADS, (int) k);
+  //int shared = sizeof(float) * threads * 2;
+  gArgMax<<<blocks, threads>>>(Out->data(), In->data(), m, k);
+  cudaStreamSynchronize(0);
+}
+
+///////////////////////////////////////////////////////

 Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
             bool transA, bool transB, Float beta) {
--- a/src/tensor_operators.h
+++ b/src/tensor_operators.h
@ -151,6 +151,10 @@ __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols);

 void Softmax(Tensor* Out);

+__global__ void gArgMax(float *out, const float *data, size_t rows, size_t cols);
+
+void Argmax(Tensor* Out, const Tensor* In);
+
 Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
             bool transA, bool transB, Float beta);

--- a/src/test.cu
+++ b/src/test.cu
@ -3,7 +3,51 @@
 #include "mnist.h"
 #include "vocab.h"

+#include "tensor_operators.h"
+
+using namespace std;
+
+///////////////////////////////////////////////////////
+string output(const std::vector<float> &vec)
+{
+  stringstream strm;
+  for (size_t i = 0; i < vec.size(); ++i) {
+  strm << vec[i] << " ";
+  }
+  return strm.str();
+}
+
+void testArgMax()
+{
+  using namespace std;
+  using namespace marian;
+
+  std::vector<float> hVec({29,19,  49,39,  79,99,  79,39});
+        cerr << "hVec =" << output(hVec) << endl;
+
+  thrust::device_vector<float> dVec(8);
+  thrust::copy(hVec.begin(), hVec.end(), dVec.begin());
+  float *data = thrust::raw_pointer_cast(dVec.data());
+
+  thrust::device_vector<float> dLabel(4);
+  float *labelPtr = thrust::raw_pointer_cast(dLabel.data());
+
+  gArgMax<<<4, 1, sizeof(float)>>>(labelPtr, data, 4, 2);
+
+  std::vector<float> hVec2(8);
+  thrust::copy(dVec.begin(), dVec.end(), hVec2.begin());
+  cerr << "hVec2=" << output(hVec2) << endl;
+
+  std::vector<float> hLabel(4);
+  thrust::copy(dLabel.begin(), dLabel.end(), hLabel.begin());
+  cerr << "hLabel=" << output(hLabel) << endl;
+
+  exit(0);
+}
+
+///////////////////////////////////////////////////////
 int main(int argc, char** argv) {
+  //testArgMax();

  using namespace std;
  using namespace marian;
@ -21,7 +65,7 @@ int main(int argc, char** argv) {
  std::vector<Expr> Y;
  std::vector<Expr> H;

-  ExpressionGraph g(0);
+  ExpressionGraph g;

  for (int t = 0; t < num_inputs; ++t) {
    X.emplace_back(g.input(shape={batch_size, input_size}));
@ -39,10 +83,9 @@ int main(int argc, char** argv) {

  string sourceLine, targetLine;
  while (getline(sourceFile, sourceLine)) {
-	  getline(targetFile, targetLine);
-
-	  std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
-	  std::vector<size_t> targetIds = sourceVocab.ProcessSentence(targetLine);
+    getline(targetFile, targetLine);
+    std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
+    std::vector<size_t> targetIds = sourceVocab.ProcessSentence(targetLine);
  }

  std::cerr << "Building RNN..." << std::endl;
@ -57,10 +100,10 @@ int main(int argc, char** argv) {
  std::cerr << "Building output layer..." << std::endl;
  std::vector<Expr> Yp;

-  Yp.emplace_back(softmax_fast(dot(H[0], Why) + by));
+  Yp.emplace_back(softmax(dot(H[0], Why) + by));
  Expr cross_entropy = sum(Y[0] * log(Yp[0]), axis=1);
  for (int t = 1; t < num_inputs; ++t) {
-    Yp.emplace_back(softmax_fast(dot(H[t], Why) + by));
+    Yp.emplace_back(softmax(dot(H[t], Why) + by));
    cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
  }
  auto graph = -mean(cross_entropy, axis=0, name="cost");
--- a/src/thrust_functions.h
+++ b/src/thrust_functions.h
@ -85,6 +85,19 @@ namespace thrust
        return compose(unary_operator<unary_tanh>(), _1);
      }
      
+      template<typename T>
+      struct unary_sqrt : public thrust::unary_function<T,T> {
+        __host__ __device__
+        T operator()(const T &x) const { return sqrtf(x); }
+      };
+      
+      template<typename Eval>
+      __host__ __device__
+      actor<composite<unary_operator<unary_sqrt>, actor<Eval>>>
+      Sqrt(const actor<Eval> &_1) {
+        return compose(unary_operator<unary_sqrt>(), _1);
+      }
+      
      template<typename T1, typename T2>
      __host__ __device__
      actor<composite<binary_operator<thrust::maximum>, actor<T1>, actor<T2>>>
--- a/src/train_mnist.cu
+++ b/src/train_mnist.cu
@ -11,12 +11,12 @@ int main(int argc, char** argv) {
  int numofdata;

  vector<float> trainImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
-  vector<float>trainLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
+  vector<float> trainLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);

  using namespace marian;
  using namespace keywords;

-  ExpressionGraph g(0);
+  ExpressionGraph g;
  
  Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
  Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
@ -24,16 +24,13 @@ int main(int argc, char** argv) {
  Expr w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE}), "w");
  Expr b = named(g.param(shape={1, LABEL_SIZE}), "b");

-  std::vector<Expr*> params;
-  params.push_back(&w);
-  params.push_back(&b);
-
  auto scores = dot(x, w) + b;
-  auto lr = softmax_fast(scores);
+  auto lr = softmax(scores);
  auto cost = named(-mean(sum(y * log(lr), axis=1), axis=0), "cost");
  cerr << "lr=" << lr.Debug() << endl;

-  SGD opt(g, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24);
-  opt.Run();
+  Adagrad opt;
+  opt(g, 300);
+  
  return 0;
 }
--- a/src/validate_encoder_decoder.cu
+++ b/src/validate_encoder_decoder.cu
@ -7,8 +7,7 @@
 using namespace marian;
 using namespace keywords;

-ExpressionGraph build_graph(int cuda_device,
-                            int source_vocabulary_size,
+ExpressionGraph build_graph(int source_vocabulary_size,
                            int target_vocabulary_size,
                            int embedding_size,
                            int hidden_size,
@ -21,7 +20,7 @@ ExpressionGraph build_graph(int cuda_device,
  int num_inputs = num_source_tokens;
  int num_outputs = num_target_tokens;

-  ExpressionGraph g(cuda_device);
+  ExpressionGraph g;
  std::vector<Expr> X, Y, H, S;

  // We're including the stop symbol here.
@ -83,10 +82,10 @@ ExpressionGraph build_graph(int cuda_device,

  // Softmax layer and cost function.
  std::vector<Expr> Yp;
-  Yp.emplace_back(named(softmax_fast(dot(h0_d, Why) + by), "pred"));
+  Yp.emplace_back(named(softmax(dot(h0_d, Why) + by), "pred"));
  Expr cross_entropy = sum(Y[0] * log(Yp[0]), axis=1);
  for (int t = 1; t <= num_outputs; ++t) {
-    Yp.emplace_back(named(softmax_fast(dot(S[t-1], Why) + by), "pred"));
+    Yp.emplace_back(named(softmax(dot(S[t-1], Why) + by), "pred"));
    cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
  }
  auto cost = named(-mean(cross_entropy, axis=0), "cost");
@ -153,8 +152,7 @@ int main(int argc, char** argv) {
  // Build the encoder-decoder computation graph.
  int embedding_size = 50;
  int hidden_size = 100;
-  ExpressionGraph g = build_graph(0, // cuda device.
-                                  source_vocab.Size(),
+  ExpressionGraph g = build_graph(source_vocab.Size(),
                                  target_vocab.Size(),
                                  embedding_size,
                                  hidden_size,
@ -253,7 +251,6 @@ int main(int argc, char** argv) {
    ss << "Y" << t;
    g[ss.str()] = Yt;
  }
-  
 #endif

  std::cerr << "Printing the computation graph..." << std::endl;
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@ -10,7 +10,7 @@ const size_t IMAGE_SIZE = 784;
 const size_t LABEL_SIZE = 10;
 int BATCH_SIZE = 10000;

-ExpressionGraph build_graph(int cudaDevice) {
+ExpressionGraph build_graph() {
  std::cerr << "Loading model params...";
  NpzConverter converter("../scripts/test_model_single/model.npz");

@ -22,7 +22,7 @@ ExpressionGraph build_graph(int cudaDevice) {

  std::cerr << "Building model...";
  
-  ExpressionGraph g(cudaDevice);
+  ExpressionGraph g;
  auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
  auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
  
@ -32,7 +32,7 @@ ExpressionGraph build_graph(int cudaDevice) {
                         init=from_vector(bData)), "b");

  auto probs = named(
-    softmax_fast(dot(x, w) + b), //, axis=1),
+    softmax(dot(x, w) + b), //, axis=1),
    "probs"
  );
  
@ -52,7 +52,7 @@ int main(int argc, char** argv) {
  std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
  std::cerr << "Done." << std::endl;

-  ExpressionGraph g = build_graph(0);
+  ExpressionGraph g = build_graph();
  
  Tensor xt({BATCH_SIZE, IMAGE_SIZE});
  Tensor yt({BATCH_SIZE, LABEL_SIZE});
--- a/src/validate_mnist_batch.cu
+++ b/src/validate_mnist_batch.cu
@ -56,8 +56,7 @@ int main(int argc, char** argv) {

  std::cerr << "\tDone." << std::endl;

-
-  ExpressionGraph g(0);
+  ExpressionGraph g;

  auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
  auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y");
@ -69,7 +68,7 @@ int main(int argc, char** argv) {

  std::cerr << "Building model...";
  auto layer1 = tanh(dot(x, w1) + b1);
-  auto layer2 = softmax(dot(layer1, w2) + b2, axis=1, name="layer2");
+  auto layer2 = softmax(dot(layer1, w2) + b2);
  auto predict = layer2;

  std::cerr << "Done." << std::endl;