resolved conflicts

2024-11-05 01:31:46 +03:00 · 2016-09-20 14:14:13 +02:00 · 2016-09-20 14:14:13 +02:00 · 99b643dcfa
commit 99b643dcfa
parent 4c8b6bb171 8677f99597
12 changed files with 656 additions and 544 deletions
--- a/marian/.cproject
+++ b/marian/.cproject
@ -37,11 +37,15 @@
 							</tool>
 							<tool id="nvcc.linker.base.635344589" name="NVCC Linker" superClass="nvcc.linker.base">
 								<option id="nvcc.linker.option.libs.1878015233" name="Libraries (-l)" superClass="nvcc.linker.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="boost_chrono"/>
+									<listOptionValue builtIn="false" value="boost_system"/>
+									<listOptionValue builtIn="false" value="boost_timer"/>
 									<listOptionValue builtIn="false" value="cudnn"/>
 									<listOptionValue builtIn="false" value="cuda"/>
 									<listOptionValue builtIn="false" value="cublas"/>
 								</option>
 								<option id="nvcc.linker.option.paths.1326041662" name="Library search path (-L)" superClass="nvcc.linker.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/}/boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="/usr/local/cuda/lib"/>
 									<listOptionValue builtIn="false" value="/usr/lib"/>
 								</option>
@ -56,11 +60,11 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.843925199" name="validate_mnist_batch.cu" rcbsApplicability="disable" resourcePath="src/validate_mnist_batch.cu" toolsToInvoke="nvcc.compiler.base.1979453423.378728796">
-						<tool id="nvcc.compiler.base.1979453423.378728796" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
+					<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.799279171" name="mnist_benchmark.cu" rcbsApplicability="disable" resourcePath="src/mnist_benchmark.cu" toolsToInvoke="nvcc.compiler.base.1979453423.992734787">
+						<tool id="nvcc.compiler.base.1979453423.992734787" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
 					</fileInfo>
 					<sourceEntries>
-						<entry excluding="src/validate_mnist_batch.cu|src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="src/mnist_benchmark.cu|src/validate_encoder_decoder.cu|src/test.cu|src/validate_mnist_batch.cu|src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -18,8 +18,6 @@ cuda_add_executable(
  test.cu
 )

-target_link_libraries(marian marian_lib)
-
 cuda_add_executable(
  mnist_benchmark
  mnist_benchmark.cu
@ -35,11 +33,18 @@ cuda_add_executable(
  validate_encoder_decoder.cu
 )

+cuda_add_executable(
+  test_nodes
+  test_nodes.cu
+)
+
+target_link_libraries(marian marian_lib)
 target_link_libraries(mnist_benchmark marian_lib)
 target_link_libraries(validate_mnist_batch marian_lib)
 target_link_libraries(validate_encoder_decoder marian_lib)
+target_link_libraries(test_nodes marian_lib)

-foreach(exec marian mnist_benchmark validate_mnist_batch validate_encoder_decoder)
+foreach(exec marian mnist_benchmark validate_mnist_batch validate_encoder_decoder test_nodes)
  target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn curand)
  cuda_add_cublas_to_target(${exec})
  set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
--- a/src/chainable.h
+++ b/src/chainable.h
@ -34,6 +34,8 @@ struct Chainable {
    virtual ~Chainable() { }
    virtual void forward() { }
    virtual void backward() { }
+    virtual void backward_numeric(Float delta) { }
+
    virtual void check() { }
    virtual void init_dependent() { }
    virtual void set_zero_adjoint() { }
--- a/src/expression_graph.h
+++ b/src/expression_graph.h
@ -127,6 +127,19 @@ class ExpressionGraph {
        (*it)->backward();
    }

+    void backward_numeric(Float delta) {
+      for(auto&& v : *stack_)
+        v->set_zero_adjoint();
+
+      typedef typename ChainableStack::reverse_iterator It;
+      stack_->back()->init_dependent();
+      for(It it = stack_->rbegin(); it != stack_->rend(); ++it) {
+    	  Chainable<Tensor> *chainable = *it;
+    	  //chainable->backward();
+    	  chainable->backward_numeric(delta);
+      }
+    }
+
    /**
     * @brief Returns a string representing this expression graph in <code>graphviz</code> notation.
     *
--- a/src/mnist_benchmark.cu
+++ b/src/mnist_benchmark.cu
@ -33,8 +33,8 @@ ExpressionGraph build_graph(const std::vector<int>& dims) {
      layers.emplace_back(x);
    }
    else {
-      //layers.emplace_back(reluplus(dot(layers.back(), weights.back()), biases.back()));
-      layers.emplace_back(relu(dot(layers.back(), weights.back()) + biases.back()));
+      layers.emplace_back(reluplus(dot(layers.back(), weights.back()), biases.back()));
+      //layers.emplace_back(relu(dot(layers.back(), weights.back()) + biases.back()));
    }
    
    weights.emplace_back(
--- a/src/node_operators.h
+++ b/src/node_operators.h
@ -23,6 +23,8 @@

 #include "node.h"
 #include "tensor_operators.h"
+#include "node_operators_unary.h"
+#include "node_operators_binary.h"

 namespace marian {

@ -109,527 +111,4 @@ struct ParamNode : public Node {
    bool initialized_;
 };

-struct UnaryNodeOp : public Node {
-    ChainPtr a_;
-
-    template <typename ...Args>
-    UnaryNodeOp(ChainPtr a, Args ...args)
-    : Node(keywords::shape=a->shape(), //@TODO: Check keywords?
-           args...), a_(a) {}
-};
-
-struct LogitNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  LogitNodeOp(Args ...args)
-  : UnaryNodeOp(args...) {  }
-
-  void forward() {
-    Element(_1 = Sigma(_2),
-            val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += _2 * _3 * (1.0f - _3),
-            a_->grad(), adj_, val_);
-  }
-
-  void check() {
-    
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("logit")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct TanhNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  TanhNodeOp(Args ...args)
-  : UnaryNodeOp(args...) { }
-
-  void forward() {
-    Element(_1 = Tanh(_2),
-            val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += _2 * (1.0f - (_3 * _3)),
-            a_->grad(), adj_, val_);
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("tanh")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct ReLUNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  ReLUNodeOp(Args ...args)
-  : UnaryNodeOp(args...) { }
-
-  void forward() {
-    Element(_1 = ReLU(_2),
-            val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += _2 * ReLUback(_3),
-            a_->grad(), adj_, a_->val());
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("ReLU")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-// @TODO: slow and probably buggy
-struct DropoutNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  DropoutNodeOp(Args ...args)
-  : UnaryNodeOp(args...),
-    p_(0.5), seed_(time(0)) { }
-
-  void forward() {
-    //Element(_1 = Bernoulli(p_, (size_t)this) * _2,
-    //        val_, a_->val())
-    Dropout(val_, a_->val(), p_, seed_++);
-  }
-
-  void backward() {
-    Element(_1 += _2 * (_3 != 0.0f), // transform non-zero to 1 
-            a_->grad(), adj_, val_);
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("dropout")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-  private:
-    float p_;
-    int seed_;
-};
-
-
-struct SoftmaxNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-    SoftmaxNodeOp(Args ...args)
-    : UnaryNodeOp(args...) { }
-
-  void forward() {
-    // B = softmax(A).
-    thrust::copy(a_->val().begin(), a_->val().end(), val_.begin());
-    // Safe version of softmax.
-    Softmax(&val_);
-  }
-
-  void backward() {
-    // For each row, the Jacobian times vector is given by:
-    // J * dy = p .* (dy - avg*1)
-    // where avg = p'*dy and p is the softmax output (probabilities).
-    //
-    // For more information, see sec. 2.5 of the following reference:
-    // André F. T. Martins and Ramon Astudillo.
-    // "From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label
-    // Classification." ICML 2016.
-    // http://jmlr.org/proceedings/papers/v48/martins16.pdf
-
-    SoftmaxGrad(a_->grad(), adj_, val_);
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("softmax")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-};
-
-struct ArgmaxNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  ArgmaxNodeOp(ChainPtr a, Args ...args)
-    : UnaryNodeOp(a, keywords::shape=newShape(a), args...) { }
-
-  void forward() {
-    // B = softmax(A).
-    Argmax(&val_, &a_->val());
-  }
-
-  void backward() {
-  }
-
-  Shape newShape(ChainPtr a) {
-    Shape shape = a->shape();
-    shape[1] = 1;
-    return shape;
-  }
-
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label="
-      << label("argmax") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct LogNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  LogNodeOp(Args ...args)
-  : UnaryNodeOp(args...) {}
-
-  void forward() {
-    Element(_1 = Log(_2), val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += _2 * (1.f / _3),
-            a_->grad(), adj_, a_->val());
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label="
-      << label("log") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct ExpNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-    ExpNodeOp(Args ...args)
-    : UnaryNodeOp(args...) { }
-
-  void forward() {
-    Element(_1 = Exp(_2), val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += _2 * Exp(_3),
-            a_->grad(), adj_, a_->val());
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("exp")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct NegNodeOp : public UnaryNodeOp {
-  template <typename ...Args>
-  NegNodeOp(Args ...args)
-  : UnaryNodeOp(args...) { }
-
-  void forward() {
-    Element(_1 = -_2, val_, a_->val());
-  }
-
-  void backward() {
-    Element(_1 += -_2, a_->grad(), adj_);
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label="
-      << label("-") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-/******************************************************/
-
-struct BinaryNodeOp : public Node {
-  ChainPtr a_;
-  ChainPtr b_;
-
-  template <typename ...Args>
-  BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-   : Node(args...), a_(a), b_(b) {}
-};
-
-/*** Matrix Product ***/
-
-struct DotNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-  : BinaryNodeOp(a, b,
-                 keywords::shape=newShape(a, b),
-                 args...) { }
-
-  Shape newShape(ChainPtr a, ChainPtr b) {
-    Shape shape1 = a->shape();
-    Shape shape2 = b->shape();
-    UTIL_THROW_IF2(shape1[1] != shape2[0],
-                   "matrix product requires dimensions to match");
-    shape1[1] = shape2[1];
-    return shape1;
-  }
-
-  void forward() {
-    // C = A*B
-    Prod(val_, a_->val(), b_->val(), false, false);
-  }
-
-  void backward() {
-    // D is the adjoint, the matrix of derivatives
-    // df/dA += D*B.T
-    // df/dB += A.T*D
-    // beta set to 1.0 in gemm, C = dot(A,B) + beta * C
-    // to sum gradients from different graph parts
-    Prod(a_->grad(), adj_, b_->val(), false, true, 1.0);
-    Prod(b_->grad(), a_->val(), adj_, true, false, 1.0);
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("×")
-      << ", style=\"filled\", fillcolor=\"orange\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct PlusNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
-    
-  void forward() {
-    Element(_1 = _2 + _3,
-            val_, a_->val(), b_->val());
-  }
-  
-  void backward() {
-    Element(_1 += _2,
-            a_->grad(), adj_);
-    Element(_1 += _2,
-            b_->grad(), adj_);
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("+")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct ReLUPlusNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  ReLUPlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
-    
-  void forward() {
-    // v = f(g(a, b))
-    Element(_1 = ReLU(_2 + _3),
-            val_, a_->val(), b_->val());
-  }
-  
-  void backward() {
-    // df/da = adj * f'(g(a, b)) : dg/da * df/dg 
-    // df/db = adj * f'(g(a, b)) : dg/db * df/dg 
-    Element(_1 += _2 * ReLUback(_3 + _4),
-            a_->grad(), adj_, a_->val(), b_->val());
-    Element(_1 += _2 * ReLUback(_3 + _4),
-            b_->grad(), adj_, a_->val(), b_->val());
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("ReLU<br/>+")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct MinusNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  MinusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
-    
-  void forward() {
-    Element(_1 = _2 - _3,
-            val_, a_->val(), b_->val());
-  }
-  
-  void backward() {
-    Element(_1 += _2,
-            a_->grad(), adj_);
-    Element(_1 -= _2,
-            b_->grad(), adj_);
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("-")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct MultNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  MultNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
-    
-  void forward() {
-    Element(_1 = _2 * _3,
-            val_, a_->val(), b_->val());
-  }
-  
-  void backward() {
-    Element(_1 += _2 * _3,
-            a_->grad(), adj_, b_->val());
-    Element(_1 += _2 * _3,
-            b_->grad(), adj_, a_->val());
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("•")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-struct DivNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-  DivNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
-    
-  void forward() {
-    Element(_1 = _2 / _3,
-            val_, a_->val(), b_->val());
-  }
-  
-  void backward() {
-    Element(_1 += _2 * 1.0f / _3,
-            a_->grad(), adj_, b_->val());
-    Element(_1 -= _2 * _3 / (_4 * _4),
-            b_->grad(), adj_, a_->val(), b_->val());
-  }
-  
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("÷")
-      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
-};
-
-// Cross-entropy node. It computes -b*log(softmax(a)), summing rowwise.
-struct CrossEntropyNodeOp : public BinaryNodeOp {
-  template <typename ...Args>
-    CrossEntropyNodeOp(ChainPtr a, ChainPtr b, Args ...args)
-    : BinaryNodeOp(a, b,
-                   keywords::shape=newShape(a, b),
-                   args...) { }
-
-  Shape newShape(ChainPtr a, ChainPtr b) {
-    Shape shape1 = a->shape();
-    Shape shape2 = b->shape();
-    UTIL_THROW_IF2(shape1[0] != shape2[0] || shape1[1] != shape2[1],
-                   "cross entropy requires dimensions to match");
-    shape1[1] = 1;
-    return shape1;
-  }
-
-  // We're caching the softmax probabilities here because we'll need them for
-  // the backward computation.
-  void forward() {
-    // C = -dot(B, log(softmax(A))).
-    if (probs_) {
-      probs_.set(0.0);
-    } else {
-      probs_.allocate(a_->val().shape(), 0.0);
-    }
-    thrust::copy(a_->val().begin(), a_->val().end(), probs_.begin());
-    Softmax(&probs_); // Safe version of softmax.
-    Tensor result(a_->val().shape());
-    Element(_1 = -_2 * Log(_3), result, b_->val(), probs_);
-    SumRowwise(result, val_);
-  }
-
-  // @TODO: In most cases it's wasteful to compute the derivative with respect
-  // to the second input which is typically an input node in the computation
-  // graph. In general the backward functions can skip the computation of
-  // gradients wrt input nodes.
-  void backward() {
-    // For each row, the first input derivative is given by adj * (p - y),
-    // where y is the gold label distribution (e.g. one hot vector) and
-    // p is the softmax output (probabilities).
-    // The second input derivative is -adj*log(p).
-    Tensor result(probs_.shape());
-
-    // Compute first input derivative.
-    Element(_1 = _2 -  _3, result, probs_, b_->val());
-    ScaleRowwise(result, adj_);
-    Element(_1 += _2, a_->grad(), result);
-
-    // Compute second input derivative.
-    Element(_1 = -Log(_2), result, probs_); // @TODO: use a cached log here.
-    ScaleRowwise(result, adj_);
-    Element(_1 += _2, b_->grad(), result);
-  }
-
-  virtual std::string graphviz() {
-    std::stringstream ss;
-    ss << "\"" << this << "\" [shape=\"box\", label=" << label("x-ent")
-      << ", style=\"filled\", fillcolor=\"orange\"]" << std::endl;
-    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
-    return ss.str();
-  };
-
- protected:
-  Tensor probs_;
-
-};
-
 }
--- a/src/node_operators_binary.h
+++ b/src/node_operators_binary.h
@ -0,0 +1,270 @@
+#include "node.h"
+#include "tensor_operators.h"
+
+namespace marian {
+
+struct BinaryNodeOp : public Node {
+  ChainPtr a_;
+  ChainPtr b_;
+
+  template <typename ...Args>
+  BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+   : Node(args...), a_(a), b_(b) {}
+};
+
+/*** Matrix Product ***/
+
+struct DotNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+  : BinaryNodeOp(a, b,
+                 keywords::shape=newShape(a, b),
+                 args...) { }
+
+  Shape newShape(ChainPtr a, ChainPtr b) {
+    Shape shape1 = a->shape();
+    Shape shape2 = b->shape();
+    UTIL_THROW_IF2(shape1[1] != shape2[0],
+                   "matrix product requires dimensions to match");
+    shape1[1] = shape2[1];
+    return shape1;
+  }
+
+  void forward() {
+    // C = A*B
+    Prod(val_, a_->val(), b_->val(), false, false);
+  }
+
+  void backward() {
+    // D is the adjoint, the matrix of derivatives
+    // df/dA += D*B.T
+    // df/dB += A.T*D
+    // beta set to 1.0 in gemm, C = dot(A,B) + beta * C
+    // to sum gradients from different graph parts
+    Prod(a_->grad(), adj_, b_->val(), false, true, 1.0);
+    Prod(b_->grad(), a_->val(), adj_, true, false, 1.0);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("×")
+      << ", style=\"filled\", fillcolor=\"orange\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct PlusNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
+
+  void forward() {
+    Element(_1 = _2 + _3,
+            val_, a_->val(), b_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2,
+            a_->grad(), adj_);
+    Element(_1 += _2,
+            b_->grad(), adj_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("+")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct ReLUPlusNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  ReLUPlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
+
+  void forward() {
+    Element(_1 = ReLU(_2 + _3),
+            val_, a_->val(), b_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * ReLUback(_3 + _4),
+            a_->grad(), adj_, a_->val(), b_->val());
+    Element(_1 += _2 * ReLUback(_3 + _4),
+            b_->grad(), adj_, a_->val(), b_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("ReLU<br/>+")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct MinusNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  MinusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
+
+  void forward() {
+    Element(_1 = _2 - _3,
+            val_, a_->val(), b_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2,
+            a_->grad(), adj_);
+    Element(_1 -= _2,
+            b_->grad(), adj_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("-")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct MultNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  MultNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
+
+  void forward() {
+    Element(_1 = _2 * _3,
+            val_, a_->val(), b_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * _3,
+            a_->grad(), adj_, b_->val());
+    Element(_1 += _2 * _3,
+            b_->grad(), adj_, a_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("•")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct DivNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+  DivNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
+
+  void forward() {
+    Element(_1 = _2 / _3,
+            val_, a_->val(), b_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * 1.0f / _3,
+            a_->grad(), adj_, b_->val());
+    Element(_1 -= _2 * _3 / (_4 * _4),
+            b_->grad(), adj_, a_->val(), b_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("÷")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+// Cross-entropy node. It computes -b*log(softmax(a)), summing rowwise.
+struct CrossEntropyNodeOp : public BinaryNodeOp {
+  template <typename ...Args>
+    CrossEntropyNodeOp(ChainPtr a, ChainPtr b, Args ...args)
+    : BinaryNodeOp(a, b,
+                   keywords::shape=newShape(a, b),
+                   args...) { }
+
+  Shape newShape(ChainPtr a, ChainPtr b) {
+    Shape shape1 = a->shape();
+    Shape shape2 = b->shape();
+    UTIL_THROW_IF2(shape1[0] != shape2[0] || shape1[1] != shape2[1],
+                   "cross entropy requires dimensions to match");
+    shape1[1] = 1;
+    return shape1;
+  }
+
+  // We're caching the softmax probabilities here because we'll need them for
+  // the backward computation.
+  void forward() {
+    // C = -dot(B, log(softmax(A))).
+    if (probs_) {
+      probs_.set(0.0);
+    } else {
+      probs_.allocate(a_->val().shape(), 0.0);
+    }
+    thrust::copy(a_->val().begin(), a_->val().end(), probs_.begin());
+    Softmax(&probs_); // Safe version of softmax.
+    Tensor result(a_->val().shape());
+    Element(_1 = -_2 * Log(_3), result, b_->val(), probs_);
+    SumRowwise(result, val_);
+  }
+
+  // @TODO: In most cases it's wasteful to compute the derivative with respect
+  // to the second input which is typically an input node in the computation
+  // graph. In general the backward functions can skip the computation of
+  // gradients wrt input nodes.
+  void backward() {
+    // For each row, the first input derivative is given by adj * (p - y),
+    // where y is the gold label distribution (e.g. one hot vector) and
+    // p is the softmax output (probabilities).
+    // The second input derivative is -adj*log(p).
+    Tensor result(probs_.shape());
+
+    // Compute first input derivative.
+    Element(_1 = _2 -  _3, result, probs_, b_->val());
+    ScaleRowwise(result, adj_);
+    Element(_1 += _2, a_->grad(), result);
+
+    // Compute second input derivative.
+    Element(_1 = -Log(_2), result, probs_); // @TODO: use a cached log here.
+    ScaleRowwise(result, adj_);
+    Element(_1 += _2, b_->grad(), result);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("x-ent")
+      << ", style=\"filled\", fillcolor=\"orange\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+ protected:
+  Tensor probs_;
+
+};
+
+
+}
+
--- a/src/node_operators_unary.h
+++ b/src/node_operators_unary.h
@ -0,0 +1,264 @@
+#include "node.h"
+#include "tensor_operators.h"
+
+namespace marian {
+
+struct UnaryNodeOp : public Node {
+    ChainPtr a_;
+
+    template <typename ...Args>
+    UnaryNodeOp(ChainPtr a, Args ...args)
+    : Node(keywords::shape=a->shape(), //@TODO: Check keywords?
+           args...), a_(a) {}
+};
+
+struct LogitNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  LogitNodeOp(Args ...args)
+  : UnaryNodeOp(args...) {  }
+
+  void forward() {
+    Element(_1 = Sigma(_2),
+            val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * _3 * (1.0f - _3),
+            a_->grad(), adj_, val_);
+  }
+
+  void check() {
+
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("logit")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct TanhNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  TanhNodeOp(Args ...args)
+  : UnaryNodeOp(args...) { }
+
+  void forward() {
+    Element(_1 = Tanh(_2),
+            val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * (1.0f - (_3 * _3)),
+            a_->grad(), adj_, val_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("tanh")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct ReLUNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  ReLUNodeOp(Args ...args)
+  : UnaryNodeOp(args...) { }
+
+  void forward() {
+    Element(_1 = ReLU(_2),
+            val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * ReLUback(_3),
+            a_->grad(), adj_, a_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("ReLU")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+// @TODO: slow and probably buggy
+struct DropoutNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  DropoutNodeOp(Args ...args)
+  : UnaryNodeOp(args...),
+    p_(0.5), seed_(time(0)) { }
+
+  void forward() {
+    //Element(_1 = Bernoulli(p_, (size_t)this) * _2,
+    //        val_, a_->val())
+    Dropout(val_, a_->val(), p_, seed_++);
+  }
+
+  void backward() {
+    Element(_1 += _2 * (_3 != 0.0f), // transform non-zero to 1
+            a_->grad(), adj_, val_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("dropout")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+  private:
+    float p_;
+    int seed_;
+};
+
+
+struct SoftmaxNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+    SoftmaxNodeOp(Args ...args)
+    : UnaryNodeOp(args...) { }
+
+  void forward() {
+    // B = softmax(A).
+    thrust::copy(a_->val().begin(), a_->val().end(), val_.begin());
+    // Safe version of softmax.
+    Softmax(&val_);
+  }
+
+  void backward() {
+    // For each row, the Jacobian times vector is given by:
+    // J * dy = p .* (dy - avg*1)
+    // where avg = p'*dy and p is the softmax output (probabilities).
+    //
+    // For more information, see sec. 2.5 of the following reference:
+    // André F. T. Martins and Ramon Astudillo.
+    // "From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label
+    // Classification." ICML 2016.
+    // http://jmlr.org/proceedings/papers/v48/martins16.pdf
+
+    SoftmaxGrad(a_->grad(), adj_, val_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("softmax")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+};
+
+struct ArgmaxNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  ArgmaxNodeOp(ChainPtr a, Args ...args)
+    : UnaryNodeOp(a, keywords::shape=newShape(a), args...) { }
+
+  void forward() {
+    // B = softmax(A).
+    Argmax(&val_, &a_->val());
+  }
+
+  void backward() {
+  }
+
+  Shape newShape(ChainPtr a) {
+    Shape shape = a->shape();
+    shape[1] = 1;
+    return shape;
+  }
+
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label="
+      << label("argmax") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct LogNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  LogNodeOp(Args ...args)
+  : UnaryNodeOp(args...) {}
+
+  void forward() {
+    Element(_1 = Log(_2), val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * (1.f / _3),
+            a_->grad(), adj_, a_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label="
+      << label("log") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct ExpNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+    ExpNodeOp(Args ...args)
+    : UnaryNodeOp(args...) { }
+
+  void forward() {
+    Element(_1 = Exp(_2), val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += _2 * Exp(_3),
+            a_->grad(), adj_, a_->val());
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label=" << label("exp")
+      << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+struct NegNodeOp : public UnaryNodeOp {
+  template <typename ...Args>
+  NegNodeOp(Args ...args)
+  : UnaryNodeOp(args...) { }
+
+  void forward() {
+    Element(_1 = -_2, val_, a_->val());
+  }
+
+  void backward() {
+    Element(_1 += -_2, a_->grad(), adj_);
+  }
+
+  virtual std::string graphviz() {
+    std::stringstream ss;
+    ss << "\"" << this << "\" [shape=\"box\", label="
+      << label("-") << ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
+    ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
+    return ss.str();
+  };
+
+};
+
+
+}
+
--- a/src/tensor.h
+++ b/src/tensor.h
@ -207,6 +207,12 @@ class TensorImpl {
 	  thrust::copy(begin, end, data_.begin());
    }

+    void incr(Float incr) {
+    	for (size_t i = 0; i < data_.size(); ++i) {
+    		data_[i] += incr;
+    	}
+    }
+
    /**
     * @brief Copy Tensor's vector from GPU to vector variable on CPU.
     *
@ -405,17 +411,12 @@ class Tensor {
     */
    std::string Debug() const
    {
-    	return pimpl_->Debug();
-    }
-
-    /**
-     * @brief Print Tensor data on CPU (?) (const). 
-     */
-    void Print() const {
-      for (int i = 0; i < size(); ++i) {
-        std::cerr << (*this)[i] << " ";
-      }
-      std::cerr << std::endl;
+    	if (!pimpl_) {
+    		return "Not yet set";
+    	}
+    	else {
+    		return pimpl_->Debug();
+    	}
    }

    //void Load(const std::string &path);
@ -434,6 +435,10 @@ class Tensor {
     */
    void set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);

+    void incr(Float incr) {
+    	pimpl_->incr(incr);
+    }
+
    /**
     * @brief Copy Tensor's vector from GPU to vector variable on CPU (const).
     *
--- a/src/test_nodes.cu
+++ b/src/test_nodes.cu
@ -0,0 +1,71 @@
+#include <vector>
+#include <random>
+#include "marian.h"
+#include "expression_graph.h"
+#include "keywords.h"
+#include "definitions.h"
+
+
+float Rand()
+{
+	float LO = -10;
+	float HI = +20;
+	float r3 = LO + static_cast <float> (rand()) /( static_cast <float> (RAND_MAX/(HI-LO)));
+	return r3;
+}
+
+int main(int argc, char** argv)
+{
+  using namespace std;
+  using namespace marian;
+  using namespace keywords;
+
+  int input_size = 10;
+  int output_size = 10;
+  int batch_size = 25;
+
+  // define graph
+  ExpressionGraph g;
+  Expr inExpr = g.input(shape={batch_size, input_size});
+  Expr labelExpr = g.input(shape={batch_size, output_size});
+
+  //Expr outExpr = softmax(inExpr);
+  //Expr outExpr = tanh(inExpr);
+  Expr outExpr = - inExpr;
+  Expr ceExpr = cross_entropy(outExpr, labelExpr);
+  Expr cost = mean(ceExpr, axis=0);
+
+  // create data
+  srand(0);
+  std::vector<float> values(batch_size * input_size);
+  generate(begin(values), end(values), Rand);
+
+  std::vector<float> labels(batch_size * input_size);
+  generate(begin(labels), end(labels), Rand);
+
+  Tensor inTensor({batch_size, input_size});
+  thrust::copy(values.begin(), values.end(), inTensor.begin());
+
+  Tensor labelTensor({batch_size, input_size});
+  thrust::copy(labels.begin(), labels.end(), labelTensor.begin());
+
+  inExpr = inTensor;
+  labelExpr = labelTensor;
+
+  // train
+  g.forward(batch_size);
+  //g.backward();
+  g.backward_numeric(0.01);
+
+  std::cout << g.graphviz() << std::endl;
+
+  std::cerr << "inTensor=" << inTensor.Debug() << std::endl;
+
+  Tensor outTensor = outExpr.val();
+  std::cerr << "outTensor=" << outTensor.Debug() << std::endl;
+
+  Tensor outGrad = outExpr.grad();
+  std::cerr << "outGrad=" << outGrad.Debug() << std::endl;
+
+
+}
--- a/src/validate_encoder_decoder.cu
+++ b/src/validate_encoder_decoder.cu
@ -133,9 +133,7 @@ int main(int argc, char** argv) {
  while (getline(source_file, source_line)) {
    getline(target_file, target_line);
    std::vector<size_t> source_ids = source_vocab.ProcessSentence(source_line);
-    source_ids.push_back(source_vocab.GetEOS()); // Append EOS token.
    std::vector<size_t> target_ids = target_vocab.ProcessSentence(target_line);
-    target_ids.push_back(target_vocab.GetEOS()); // Append EOS token.
    source_sentences.push_back(source_ids);
    target_sentences.push_back(target_ids);
    if (num_source_tokens < 0 || source_ids.size() > num_source_tokens) {
--- a/src/vocab.cpp
+++ b/src/vocab.cpp
@ -75,6 +75,7 @@ std::vector<size_t> Vocab::ProcessSentence(const std::string &sentence)
 		size_t id = GetOrCreate(toks[i]);
 		ret[i] = id;
 	}
+  ret.push_back(GetEOS()); // Append EOS token.

 	return ret;
 }