create backward_numeric() for binary nodes

2024-11-05 01:31:46 +03:00 · 2016-09-20 12:46:43 +01:00 · 2016-09-20 12:46:43 +01:00 · f7e5e02e8c
commit f7e5e02e8c
parent 8677f99597
2 changed files with 192 additions and 0 deletions
--- a/src/node_operators_binary.h
+++ b/src/node_operators_binary.h
@ -10,6 +10,115 @@ struct BinaryNodeOp : public Node {
  template <typename ...Args>
  BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
   : Node(args...), a_(a), b_(b) {}
+
+  std::vector<float> StoreTensorInVec(Tensor tensor)
+  {
+	  size_t totSize = GetTotalSize(tensor.shape());
+	  std::vector<float> vec(totSize);
+	  thrust::copy(tensor.begin(), tensor.end(), vec.begin());
+	  return vec;
+  }
+
+  void calc_numeric_grad(
+		  Float delta,
+		  Tensor input,
+		  Tensor grad,
+		  const std::vector<float> &prevCalcGrad
+		  )
+  {
+	  size_t totSize = GetTotalSize(input.shape());
+
+	  std::vector<float> diffGrad(totSize);
+	  thrust::copy(grad.begin(), grad.end(), diffGrad.begin());
+	  output("diffGrad", diffGrad);
+
+	  // reset grad
+	  thrust::copy(prevCalcGrad.begin(), prevCalcGrad.end(), grad.begin());
+	  //cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
+
+	  // START CALC of numerical gradient
+	  // new values
+	  input.incr(delta);
+
+	  forward();
+	  //cerr << "input=" << input.Debug() << endl;
+	  //cerr << "val_=" << val_.Debug() << endl;
+
+	  std::vector<float> newVal(totSize);
+	  thrust::copy(val_.begin(), val_.end(), newVal.begin());
+	  //output("newVal", newVal);
+
+	  // old values
+	  input.incr(-delta);
+
+	  forward();
+	  //cerr << "input=" << input.Debug() << endl;
+	  //cerr << "val_=" << val_.Debug() << endl;
+
+	  std::vector<float> origVal(totSize);
+	  thrust::copy(val_.begin(), val_.end(), origVal.begin());
+	  //output("origVal", origVal);
+
+	  // calc gradient
+	  //cerr << "adj_=" << adj_.Debug() << endl;
+	  std::vector<float> adjVec(totSize);
+	  thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
+
+	  std::vector<float> numericalGrad(totSize);
+	  for (size_t i = 0; i < totSize; ++i) {
+		  numericalGrad[i] = prevCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
+	  }
+	  output("numericalGrad", numericalGrad);
+	  //cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
+
+	  // set grad results
+	  thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
+
+	  // print out diff between diffGrad and numericalGrad
+	  std::vector<float> origGrad(totSize);
+	  std::vector<float> diff(totSize);
+
+	  thrust::copy(grad.begin(), grad.end(), origGrad.begin());
+	  for (size_t i = 0; i < totSize; ++i) {
+		  diff[i] = (diffGrad[i] - numericalGrad[i]) / delta;
+	  }
+	  output("diff", diff);
+
+  }
+
+  void backward_numeric(Float delta) {
+	  using namespace std;
+
+	  cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
+
+	  std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
+	  //output("preCalcGradA", preCalcGradA);
+
+	  std::vector<float> preCalcGradB = StoreTensorInVec(b_->grad());
+	  //output("preCalcGradB", preCalcGradB);
+
+	  // use df/dx to calc grad
+	  backward();
+	  //cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
+
+	  cerr << "TENSOR A:" << endl;
+	  calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
+	  cerr << "TENSOR B:" << endl;
+	  calc_numeric_grad(delta, b_->val(), b_->grad(), preCalcGradB);
+
+	  // redo proper grad
+	  backward();
+  }
+
+  void output(const std::string &title, const std::vector<float> &vec)
+  {
+	  std::cerr << title << " " << vec.size() << ":";
+	  for (size_t i = 0; i < vec.size(); ++i) {
+		  std::cerr << vec[i] << " ";
+	  }
+	  std::cerr << std::endl;
+  }
+
 };

 /*** Matrix Product ***/
--- a/src/node_operators_unary.h
+++ b/src/node_operators_unary.h
@ -10,6 +10,89 @@ struct UnaryNodeOp : public Node {
    UnaryNodeOp(ChainPtr a, Args ...args)
    : Node(keywords::shape=a->shape(), //@TODO: Check keywords?
           args...), a_(a) {}
+
+    void backward_numeric(Float delta) {
+      using namespace std;
+
+      cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
+
+	  Tensor input = a_->val();
+	  size_t totSize = GetTotalSize(input.shape());
+
+	  std::vector<float> preCalcGrad(totSize);
+	  thrust::copy(a_->grad().begin(), a_->grad().end(), preCalcGrad.begin());
+	  output("preCalcGrad", preCalcGrad);
+
+	  // use df/dx to calc grad
+	  backward();
+	  //cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
+
+	  std::vector<float> diffGrad(totSize);
+	  thrust::copy(a_->grad().begin(), a_->grad().end(), diffGrad.begin());
+	  output("diffGrad", diffGrad);
+
+	  // reset grad
+	  thrust::copy(preCalcGrad.begin(), preCalcGrad.end(), a_->grad().begin());
+	  //cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
+
+	  // START CALC of numerical gradient
+	  // new values
+	  input.incr(delta);
+
+	  forward();
+	  //cerr << "input=" << input.Debug() << endl;
+	  //cerr << "val_=" << val_.Debug() << endl;
+
+	  std::vector<float> newVal(totSize);
+	  thrust::copy(val_.begin(), val_.end(), newVal.begin());
+	  //output("newVal", newVal);
+
+	  // old values
+	  input.incr(-delta);
+
+	  forward();
+	  //cerr << "input=" << input.Debug() << endl;
+	  //cerr << "val_=" << val_.Debug() << endl;
+
+	  std::vector<float> origVal(totSize);
+	  thrust::copy(val_.begin(), val_.end(), origVal.begin());
+	  //output("origVal", origVal);
+
+	  // calc gradient
+	  //cerr << "adj_=" << adj_.Debug() << endl;
+	  std::vector<float> adjVec(totSize);
+	  thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
+
+	  std::vector<float> numericalGrad(totSize);
+	  for (size_t i = 0; i < totSize; ++i) {
+		  numericalGrad[i] = preCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
+	  }
+	  output("numericalGrad", numericalGrad);
+	  //cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
+
+	  // set grad results
+	  thrust::copy(numericalGrad.begin(), numericalGrad.end(), a_->grad().begin());
+
+	  // print out diff between diffGrad and numericalGrad
+	  std::vector<float> origGrad(totSize);
+	  std::vector<float> diff(totSize);
+
+	  thrust::copy(a_->grad().begin(), a_->grad().end(), origGrad.begin());
+	  for (size_t i = 0; i < totSize; ++i) {
+		  diff[i] = (diffGrad[i] - numericalGrad[i]) / delta;
+	  }
+	  output("diff", diff);
+    }
+
+    void output(const std::string &title, const std::vector<float> &vec)
+    {
+  	  std::cerr << title << " " << vec.size() << ":";
+  	  for (size_t i = 0; i < vec.size(); ++i) {
+  		  std::cerr << vec[i] << " ";
+  	  }
+  	  std::cerr << std::endl;
+    }
+
 };

 struct LogitNodeOp : public UnaryNodeOp {