working separated graphs, added params

2024-09-11 06:15:56 +03:00 · 2016-09-16 00:23:47 +02:00 · 2016-09-16 00:23:47 +02:00 · 94914d56e8
commit 94914d56e8
parent 976c8039db
7 changed files with 91 additions and 90 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.1)
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

 project(marian CXX)
-SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O0 -funroll-loops -Wno-unused-result -Wno-deprecated")
-LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O0; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
+SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
+LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
 add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
 SET(CUDA_PROPAGATE_HOST_FLAGS OFF)

--- a/src/expression_graph.h
+++ b/src/expression_graph.h
@ -1,5 +1,7 @@
 #pragma once

+#include <map>
+
 #include "definitions.h"
 #include "chainable.h"
 #include "node_operators.h"
@ -65,7 +67,9 @@ class ExpressionGraph {
    
    template <typename ...Args>
    inline Expr param(Args ...args) {
-      return Expr(this, new ParamNode(args...));
+      Expr e(this, new ParamNode(args...));
+      params_.emplace_back(e);
+      return e;
    }
    
    template <typename ...Args>
@ -89,8 +93,28 @@ class ExpressionGraph {
      return stack_;
    }
    
+    Expr& operator[](const std::string& name) {
+      auto it = named_.find(name);
+      UTIL_THROW_IF2(it == named_.end(), "No such named node in graph: " << name);
+      return it->second;  
+    }
+
+    bool has_node(const std::string& name) const {
+      return named_.count(name) > 0;
+    }
+    
+    void add_named_node(Expr e, const std::string& name) {
+      named_.emplace(name, e);
+    }
+    
+    std::vector<Expr>& params() {
+      return params_;
+    }
+    
  private:
    ChainableStackPtr stack_;
+    std::map<std::string, Expr> named_;
+    std::vector<Expr> params_;
 };

 }
--- a/src/expression_operators.cu
+++ b/src/expression_operators.cu
@ -4,6 +4,11 @@

 namespace marian {

+Expr named(Expr a, const std::string& name) {
+  a.graph()->add_named_node(a, name);
+  return a;
+}
+
 Expr logit(Expr a) {
  return Expr(a.graph(), new LogitNodeOp(a));
 }
@ -26,6 +31,25 @@ Expr operator-(Expr a) {

 /*********************************************************/

+static Shape newShape(ChainPtr a, ChainPtr b) {
+  size_t dimsA = a->shape().size();
+  size_t dimsB = b->shape().size();
+  UTIL_THROW_IF2(dimsA != dimsB,
+                 "Tensors have different numbers of dimensions");
+  Shape shape(dimsA);
+  for(size_t i = 0; i < dimsA; ++i) {
+    int dimA = a->shape()[i];
+    int dimB = b->shape()[i];
+    bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
+    UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
+                   << "operation cannot be broadcasted: " << dimA << " != " << dimB);
+    shape[i] = std::max(dimA, dimB);
+    if(dimA == whatevs || dimB == whatevs)
+      shape[i] = whatevs;
+  }
+  return shape;
+}
+
 Expr broadcast(Shape bShape, Expr a) {
  const Shape& aShape = a.node()->shape();
  if(aShape == bShape) {
@ -61,30 +85,11 @@ Expr broadcast(Shape bShape, Expr a) {
  }
 }

-static Shape newShape(ChainPtr a, ChainPtr b) {
-  size_t dimsA = a->shape().size();
-  size_t dimsB = b->shape().size();
-  UTIL_THROW_IF2(dimsA != dimsB,
-                 "Tensors have different numbers of dimensions");
-  Shape shape(dimsA);
-  for(size_t i = 0; i < dimsA; ++i) {
-    int dimA = a->shape()[i];
-    int dimB = b->shape()[i];
-    bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
-    UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
-                   << "operation cannot be broadcasted: " << dimA << " != " << dimB);
-    shape[i] = std::max(dimA, dimB);
-    if(dimA == whatevs || dimB == whatevs)
-      shape[i] = whatevs;
-  }
-  return shape;
-}
-
 Expr operator+(Expr a, Expr b) {
  Shape shape = newShape(a, b);
  Expr cast_a = broadcast(shape, a);
  Expr cast_b = broadcast(shape, b);
-  return Expr(a.graph(), new PlusNodeOp(a, b));
+  return Expr(a.graph(), new PlusNodeOp(cast_a, cast_b));
 }

 Expr operator-(Expr a, Expr b) {
@ -109,13 +114,7 @@ Expr operator/(Expr a, Expr b) {
 }

 Expr dot(Expr a, Expr b) {
-  Shape shape = newShape(a, b);
-  Expr cast_a = broadcast(shape, a);
-  Expr cast_b = broadcast(shape, b);
-  return Expr(a.graph(), new DotNodeOp(cast_a, cast_b));
+  return Expr(a.graph(), new DotNodeOp(a, b));
 }

-/******************************************************/
-
-
 }
--- a/src/expression_operators.h
+++ b/src/expression_operators.h
@ -4,6 +4,8 @@

 namespace marian {

+Expr named(Expr a, const std::string& name);
+
 Expr logit(Expr a);

 Expr tanh(Expr a);
--- a/src/node_operators.h
+++ b/src/node_operators.h
@ -194,8 +194,7 @@ struct BinaryNodeOp : public Node {
 struct DotNodeOp : public BinaryNodeOp {
  template <typename ...Args>
  DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-  : BinaryNodeOp(
-                 a, b,
+  : BinaryNodeOp(a, b,
                 keywords::shape=newShape(a, b),
                 args...) { }

@ -224,35 +223,6 @@ struct DotNodeOp : public BinaryNodeOp {
  }
 };

-//struct BroadcastingNodeOp : public BinaryNodeOp {
-//  template <typename ...Args>
-//  BroadcastingNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-//  : BinaryNodeOp(broadcast(newShape(a ,b), a),
-//                 broadcast(newShape(a ,b), b),
-//                 keywords::shape=newShape(a, b),
-//                 args...) {}
-//  
-//  static Shape newShape(ChainPtr a, ChainPtr b) {
-//    size_t dimsA = a->shape().size();
-//    size_t dimsB = b->shape().size();
-//    UTIL_THROW_IF2(dimsA != dimsB,
-//                   "Tensors have different numbers of dimensions");
-//    Shape shape(dimsA);
-//    for(size_t i = 0; i < dimsA; ++i) {
-//      int dimA = a->shape()[i];
-//      int dimB = b->shape()[i];
-//      bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
-//      UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
-//                     << "operation cannot be broadcasted: " << dimA << " != " << dimB);
-//      shape[i] = std::max(dimA, dimB);
-//      if(dimA == whatevs || dimB == whatevs)
-//        shape[i] = whatevs;
-//    }
-//    return shape;
-//  }
-//};
-
-
 struct PlusNodeOp : public BinaryNodeOp {
  template <typename ...Args>
  PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
--- a/src/param_initializers.h
+++ b/src/param_initializers.h
@ -43,7 +43,7 @@ std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
 }

 std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
-  return [&v](Tensor t) {
+  return [v](Tensor t) {
    t << v;
  };
 }
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@ -6,19 +6,11 @@
 using namespace marian;
 using namespace keywords;

-int main(int argc, char** argv) {
-  
-  cudaSetDevice(1);
-  
-  const size_t IMAGE_SIZE = 784;
-  const size_t LABEL_SIZE = 10;
-  int BATCH_SIZE = 10000;
-  
-  std::cerr << "Loading test set...";
-  std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
-  std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
-  std::cerr << "Done." << std::endl;
+const size_t IMAGE_SIZE = 784;
+const size_t LABEL_SIZE = 10;
+int BATCH_SIZE = 10000;

+ExpressionGraph build_graph() {
  std::cerr << "Loading model params...";
  NpzConverter converter("../scripts/test_model/model.npz");

@ -31,29 +23,43 @@ int main(int argc, char** argv) {
  std::cerr << "Building model...";
  
  ExpressionGraph g;
-  auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
-  auto y = g.input(shape={whatevs, LABEL_SIZE});
+  auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
+  auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
  
-  auto w = g.param(shape={IMAGE_SIZE, LABEL_SIZE},
-                   init=from_vector(wData));
-  auto b = g.param(shape={1, LABEL_SIZE},
-                   init=from_vector(bData));
+  auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
+                         init=from_vector(wData)), "w");
+  
+  auto b = named(g.param(shape={1, LABEL_SIZE},
+                         init=from_vector(bData)), "b");

-  auto probs = softmax(dot(x, w) + b, axis=1);
-  auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
+  auto probs = named(softmax(dot(x, w) + b, axis=1), "probs");
+  auto cost = named(-mean(sum(y * log(probs), axis=1), axis=0), "cost");
  
  std::cerr << "Done." << std::endl;
+  return g;
+}

+int main(int argc, char** argv) {
+  
+  cudaSetDevice(0);
+    
+  std::cerr << "Loading test set...";
+  std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
+  std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
+  std::cerr << "Done." << std::endl;
+
+  ExpressionGraph g = build_graph();
+  
  Tensor xt({BATCH_SIZE, IMAGE_SIZE});
  Tensor yt({BATCH_SIZE, LABEL_SIZE});
  
-  x = xt << testImages;
-  y = yt << testLabels;
+  g["x"] = (xt << testImages);
+  g["y"] = (yt << testLabels);
  
  g.forward(BATCH_SIZE);
 
  std::vector<float> results;
-  results << probs.val();
+  results << g["probs"].val();
  
  size_t acc = 0;
  for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -65,7 +71,7 @@ int main(int argc, char** argv) {
    }
    acc += (correct == proposed);
  }
-  std::cerr << "Cost: " << cost.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+  std::cerr << "Cost: " << g["cost"].val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
  
  float eta = 0.1;
  for (size_t j = 0; j < 10; ++j) {
@ -73,14 +79,14 @@ int main(int argc, char** argv) {
      g.backward();
    
      auto update_rule = _1 -= eta * _2;
-      Element(update_rule, w.val(), w.grad());
-      Element(update_rule, b.val(), b.grad());
+      for(auto param : g.params()) 
+        Element(update_rule, param.val(), param.grad());
      
      g.forward(BATCH_SIZE);
    }
    std::cerr << "Epoch: " << j << std::endl;
    std::vector<float> results;
-    results << probs.val();
+    results << g["probs"].val();
    
    size_t acc = 0;
    for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -92,7 +98,7 @@ int main(int argc, char** argv) {
      }
      acc += (correct == proposed);
    }
-    std::cerr << "Cost: " << cost.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+    std::cerr << "Cost: " << g["cost"].val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
  }
  return 0;
 }