working but not particularly accurate

2024-09-17 09:47:34 +03:00 · 2016-09-21 17:23:13 +01:00 · 2016-09-21 17:23:13 +01:00 · 0cb93637aa
commit 0cb93637aa
parent e741c9a579
5 changed files with 42 additions and 14 deletions
--- a/src/node.cu
+++ b/src/node.cu
@ -25,7 +25,7 @@ void Node::calc_numeric_grad(
 			<< endl;

 	  //cerr << "input=" << input.Debug() << endl;
-	  cerr << "adj_=" << adj_.Debug() << endl;
+	  //cerr << "adj_=" << adj_.Debug() << endl;

 	  std::vector<float> origGrad(inputSize);
 	  thrust::copy(grad.begin(), grad.end(), origGrad.begin());
@ -74,8 +74,15 @@ void Node::calc_numeric_grad(

 	  std::vector<float> numericalGrad(inputSize);
 	  for (size_t i = 0; i < numericalGrad.size(); ++i) {
-		  numericalGrad[i] = (adjVec[0] * (newVal[i] - sumValOrig) / delta);
-		  // adjVec[0] should be a matrix multiplication
+		  numericalGrad[i] = (newVal[i] - sumValOrig) / delta;
+	  }
+
+	  broadcast(numericalGrad, adjVec);
+	  //std::cerr << "broadcast size=" << numericalGrad.size() << " " << adjVec.size() << std::endl;
+	  //output("adjVec=", adjVec.begin(), adjVec.end());
+
+	  for (size_t i = 0; i < numericalGrad.size(); ++i) {
+		  numericalGrad[i] *= adjVec[i];
 		  numericalGrad[i] += prevCalcGrad[i];
 	  }

@ -88,26 +95,41 @@ void Node::calc_numeric_grad(
 	  //output("numericalGrad", numericalGrad);

 	  // print out diff between origGrad and numericalGrad
+	  /*
 	  std::vector<float> diff(inputSize);
-
 	  for (size_t i = 0; i < diff.size(); ++i) {
 		  diff[i] = (origGrad[i] - numericalGrad[i]) ;
 	  }
 	  output("diff", diff.begin(), diff.end());
+	  */

 	  // put back origGrad
 	  thrust::copy(origGrad.begin(), origGrad.end(), grad.begin());
-
 }

 std::vector<float> Node::StoreTensorInVec(Tensor tensor)
 {
-	  size_t totSize = GetTotalSize(tensor.shape());
-	  std::vector<float> vec(totSize);
-	  thrust::copy(tensor.begin(), tensor.end(), vec.begin());
-	  return vec;
+  size_t totSize = GetTotalSize(tensor.shape());
+  std::vector<float> vec(totSize);
+  thrust::copy(tensor.begin(), tensor.end(), vec.begin());
+  return vec;
 }

+void Node::broadcast(const std::vector<float> &largeVec, std::vector<float> &smallVec)
+{
+	size_t largeSize = largeVec.size();
+	size_t smallSize = smallVec.size();
+
+    UTIL_THROW_IF2(largeSize < smallSize,
+    		"largeSize < smallSize:" << largeSize << "<" << smallSize);
+    UTIL_THROW_IF2(largeSize % smallSize,
+    		"largeSize % smallSize != 0:" << largeSize << " " << smallSize);
+
+    smallVec.resize(largeSize);
+    for (size_t i = smallSize; i < largeSize; i += smallSize) {
+    	std::copy(smallVec.begin(), smallVec.begin() + smallSize, smallVec.begin() + i);
+    }
+}

 }

--- a/src/node.h
+++ b/src/node.h
@ -129,7 +129,7 @@ class Node : public Chainable<Tensor>,
 			  Tensor grad,
 			  const std::vector<float> &prevCalcGrad
 			  );
-
+	void broadcast(const std::vector<float> &largeVec, std::vector<float> &smallVec);

 };

--- a/src/node_operators_binary.h
+++ b/src/node_operators_binary.h
@ -15,7 +15,7 @@ struct BinaryNodeOp : public Node {
  void backward_debug(Float delta) {
 	  using namespace std;

-	  cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
+	  cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_debug()" << endl;

 	  std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
 	  //output("preCalcGradA", preCalcGradA);
--- a/src/node_operators_unary.h
+++ b/src/node_operators_unary.h
@ -15,7 +15,7 @@ struct UnaryNodeOp : public Node {
    void backward_debug(Float delta) {
      using namespace std;

-      cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
+      cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_debug()" << endl;

 	  std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
 	  //output("preCalcGradA", preCalcGradA);
--- a/src/test_nodes.cu
+++ b/src/test_nodes.cu
@ -31,7 +31,13 @@ int main(int argc, char** argv)

  //Expr outExpr = softmax(inExpr);
  Expr outExpr = tanh(inExpr);
-  //Expr outExpr = - inExpr;
+  outExpr = - outExpr;
+  outExpr = logit(outExpr);
+  outExpr = relu(outExpr);
+  outExpr = log(outExpr);
+  outExpr = exp(outExpr);
+  outExpr = softmax(outExpr);
+
  Expr ceExpr = cross_entropy(outExpr, labelExpr);
  Expr cost = mean(ceExpr, axis=0);

@ -55,7 +61,7 @@ int main(int argc, char** argv)
  // train
  g.forward(batch_size);
  //g.backward();
-  g.backward_debug(0.001);
+  g.backward_debug(0.01);

  std::cout << g.graphviz() << std::endl;