working separated graphs, added params

2024-09-17 09:47:34 +03:00 · 2016-09-16 00:23:47 +02:00 · 2016-09-16 00:23:47 +02:00 · 94914d56e8
commit 94914d56e8
parent 976c8039db
7 changed files with 91 additions and 90 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.1)
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 project(marian CXX)
-SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O0 -funroll-loops -Wno-unused-result -Wno-deprecated")
+SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
-LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O0; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
+LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
 add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
 SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
--- a/src/expression_graph.h
+++ b/src/expression_graph.h
@ -1,5 +1,7 @@
 #pragma once
 #include <map>
 #include "definitions.h"
 #include "chainable.h"
 #include "node_operators.h"
@ -65,7 +67,9 @@ class ExpressionGraph {
    template <typename ...Args>
    inline Expr param(Args ...args) {
-      return Expr(this, new ParamNode(args...));
+      Expr e(this, new ParamNode(args...));
      params_.emplace_back(e);
      return e;
    }
    template <typename ...Args>
@ -89,8 +93,28 @@ class ExpressionGraph {
      return stack_;
    }
    Expr& operator[](const std::string& name) {
      auto it = named_.find(name);
      UTIL_THROW_IF2(it == named_.end(), "No such named node in graph: " << name);
      return it->second;  
    }
    bool has_node(const std::string& name) const {
      return named_.count(name) > 0;
    }
    void add_named_node(Expr e, const std::string& name) {
      named_.emplace(name, e);
    }
    std::vector<Expr>& params() {
      return params_;
    }
  private:
    ChainableStackPtr stack_;
    std::map<std::string, Expr> named_;
    std::vector<Expr> params_;
 };
 }
--- a/src/expression_operators.cu
+++ b/src/expression_operators.cu
@ -4,6 +4,11 @@
 namespace marian {
 Expr named(Expr a, const std::string& name) {
  a.graph()->add_named_node(a, name);
  return a;
 }
 Expr logit(Expr a) {
  return Expr(a.graph(), new LogitNodeOp(a));
 }
@ -26,6 +31,25 @@ Expr operator-(Expr a) {
 /*********************************************************/
 static Shape newShape(ChainPtr a, ChainPtr b) {
  size_t dimsA = a->shape().size();
  size_t dimsB = b->shape().size();
  UTIL_THROW_IF2(dimsA != dimsB,
                 "Tensors have different numbers of dimensions");
  Shape shape(dimsA);
  for(size_t i = 0; i < dimsA; ++i) {
    int dimA = a->shape()[i];
    int dimB = b->shape()[i];
    bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
    UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
                   << "operation cannot be broadcasted: " << dimA << " != " << dimB);
    shape[i] = std::max(dimA, dimB);
    if(dimA == whatevs || dimB == whatevs)
      shape[i] = whatevs;
  }
  return shape;
 }
 Expr broadcast(Shape bShape, Expr a) {
  const Shape& aShape = a.node()->shape();
  if(aShape == bShape) {
@ -61,30 +85,11 @@ Expr broadcast(Shape bShape, Expr a) {
  }
 }
 static Shape newShape(ChainPtr a, ChainPtr b) {
  size_t dimsA = a->shape().size();
  size_t dimsB = b->shape().size();
  UTIL_THROW_IF2(dimsA != dimsB,
                 "Tensors have different numbers of dimensions");
  Shape shape(dimsA);
  for(size_t i = 0; i < dimsA; ++i) {
    int dimA = a->shape()[i];
    int dimB = b->shape()[i];
    bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
    UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
                   << "operation cannot be broadcasted: " << dimA << " != " << dimB);
    shape[i] = std::max(dimA, dimB);
    if(dimA == whatevs || dimB == whatevs)
      shape[i] = whatevs;
  }
  return shape;
 }
 Expr operator+(Expr a, Expr b) {
  Shape shape = newShape(a, b);
  Expr cast_a = broadcast(shape, a);
  Expr cast_b = broadcast(shape, b);
-  return Expr(a.graph(), new PlusNodeOp(a, b));
+  return Expr(a.graph(), new PlusNodeOp(cast_a, cast_b));
 }
 Expr operator-(Expr a, Expr b) {
@ -109,13 +114,7 @@ Expr operator/(Expr a, Expr b) {
 }
 Expr dot(Expr a, Expr b) {
-  Shape shape = newShape(a, b);
+  return Expr(a.graph(), new DotNodeOp(a, b));
  Expr cast_a = broadcast(shape, a);
  Expr cast_b = broadcast(shape, b);
  return Expr(a.graph(), new DotNodeOp(cast_a, cast_b));
 }
 /******************************************************/
 }
--- a/src/expression_operators.h
+++ b/src/expression_operators.h
@ -4,6 +4,8 @@
 namespace marian {
 Expr named(Expr a, const std::string& name);
 Expr logit(Expr a);
 Expr tanh(Expr a);
--- a/src/node_operators.h
+++ b/src/node_operators.h
@ -194,8 +194,7 @@ struct BinaryNodeOp : public Node {
 struct DotNodeOp : public BinaryNodeOp {
  template <typename ...Args>
  DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-  : BinaryNodeOp(
+  : BinaryNodeOp(a, b,
                 a, b,
                 keywords::shape=newShape(a, b),
                 args...) { }
@ -224,35 +223,6 @@ struct DotNodeOp : public BinaryNodeOp {
  }
 };
 //struct BroadcastingNodeOp : public BinaryNodeOp {
 //  template <typename ...Args>
 //  BroadcastingNodeOp(ChainPtr a, ChainPtr b, Args ...args)
 //  : BinaryNodeOp(broadcast(newShape(a ,b), a),
 //                 broadcast(newShape(a ,b), b),
 //                 keywords::shape=newShape(a, b),
 //                 args...) {}
 //  
 //  static Shape newShape(ChainPtr a, ChainPtr b) {
 //    size_t dimsA = a->shape().size();
 //    size_t dimsB = b->shape().size();
 //    UTIL_THROW_IF2(dimsA != dimsB,
 //                   "Tensors have different numbers of dimensions");
 //    Shape shape(dimsA);
 //    for(size_t i = 0; i < dimsA; ++i) {
 //      int dimA = a->shape()[i];
 //      int dimB = b->shape()[i];
 //      bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
 //      UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
 //                     << "operation cannot be broadcasted: " << dimA << " != " << dimB);
 //      shape[i] = std::max(dimA, dimB);
 //      if(dimA == whatevs || dimB == whatevs)
 //        shape[i] = whatevs;
 //    }
 //    return shape;
 //  }
 //};
 struct PlusNodeOp : public BinaryNodeOp {
  template <typename ...Args>
  PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
--- a/src/param_initializers.h
+++ b/src/param_initializers.h
@ -43,7 +43,7 @@ std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
 }
 std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
-  return [&v](Tensor t) {
+  return [v](Tensor t) {
    t << v;
  };
 }
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@ -6,19 +6,11 @@
 using namespace marian;
 using namespace keywords;
-int main(int argc, char** argv) {
+const size_t IMAGE_SIZE = 784;
-  
+const size_t LABEL_SIZE = 10;
-  cudaSetDevice(1);
+int BATCH_SIZE = 10000;
  const size_t IMAGE_SIZE = 784;
  const size_t LABEL_SIZE = 10;
  int BATCH_SIZE = 10000;
  std::cerr << "Loading test set...";
  std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
  std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
  std::cerr << "Done." << std::endl;
 ExpressionGraph build_graph() {
  std::cerr << "Loading model params...";
  NpzConverter converter("../scripts/test_model/model.npz");
@ -31,29 +23,43 @@ int main(int argc, char** argv) {
  std::cerr << "Building model...";
  ExpressionGraph g;
-  auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
+  auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
-  auto y = g.input(shape={whatevs, LABEL_SIZE});
+  auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
-  auto w = g.param(shape={IMAGE_SIZE, LABEL_SIZE},
+  auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
-                   init=from_vector(wData));
+                         init=from_vector(wData)), "w");
-  auto b = g.param(shape={1, LABEL_SIZE},
+  
-                   init=from_vector(bData));
+  auto b = named(g.param(shape={1, LABEL_SIZE},
                         init=from_vector(bData)), "b");
-  auto probs = softmax(dot(x, w) + b, axis=1);
+  auto probs = named(softmax(dot(x, w) + b, axis=1), "probs");
-  auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
+  auto cost = named(-mean(sum(y * log(probs), axis=1), axis=0), "cost");
  std::cerr << "Done." << std::endl;
  return g;
 }
 int main(int argc, char** argv) {
  cudaSetDevice(0);
  std::cerr << "Loading test set...";
  std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
  std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
  std::cerr << "Done." << std::endl;
  ExpressionGraph g = build_graph();
  Tensor xt({BATCH_SIZE, IMAGE_SIZE});
  Tensor yt({BATCH_SIZE, LABEL_SIZE});
-  x = xt << testImages;
+  g["x"] = (xt << testImages);
-  y = yt << testLabels;
+  g["y"] = (yt << testLabels);
  g.forward(BATCH_SIZE);
  std::vector<float> results;
-  results << probs.val();
+  results << g["probs"].val();
  size_t acc = 0;
  for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -65,7 +71,7 @@ int main(int argc, char** argv) {
    }
    acc += (correct == proposed);
  }
-  std::cerr << "Cost: " << cost.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+  std::cerr << "Cost: " << g["cost"].val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
  float eta = 0.1;
  for (size_t j = 0; j < 10; ++j) {
@ -73,14 +79,14 @@ int main(int argc, char** argv) {
      g.backward();
      auto update_rule = _1 -= eta * _2;
-      Element(update_rule, w.val(), w.grad());
+      for(auto param : g.params()) 
-      Element(update_rule, b.val(), b.grad());
+        Element(update_rule, param.val(), param.grad());
      g.forward(BATCH_SIZE);
    }
    std::cerr << "Epoch: " << j << std::endl;
    std::vector<float> results;
-    results << probs.val();
+    results << g["probs"].val();
    size_t acc = 0;
    for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -92,7 +98,7 @@ int main(int argc, char** argv) {
      }
      acc += (correct == proposed);
    }
-    std::cerr << "Cost: " << cost.val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
+    std::cerr << "Cost: " << g["cost"].val()[0] <<  " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
  }
  return 0;
 }