backward_numeric()

This commit is contained in:
Hieu Hoang 2016-09-20 10:26:57 +01:00
parent 8b242fbc97
commit 8ef7f57a8a
6 changed files with 100 additions and 6 deletions

View File

@ -34,7 +34,7 @@ struct Chainable {
virtual ~Chainable() { }
virtual void forward() { }
virtual void backward() { }
virtual void backward_numeric() { }
virtual void backward_numeric(Float delta) { }
virtual void check() { }
virtual void init_dependent() { }

View File

@ -127,7 +127,7 @@ class ExpressionGraph {
(*it)->backward();
}
void backward_numeric() {
void backward_numeric(Float delta) {
for(auto&& v : *stack_)
v->set_zero_adjoint();
@ -136,7 +136,7 @@ class ExpressionGraph {
for(It it = stack_->rbegin(); it != stack_->rend(); ++it) {
Chainable<Tensor> *chainable = *it;
//chainable->backward();
chainable->backward_numeric();
chainable->backward_numeric(delta);
}
}

View File

@ -12,13 +12,15 @@ struct BinaryNodeOp : public Node {
BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
: Node(args...), a_(a), b_(b) {}
void backward_numeric() {
void backward_numeric(Float delta) {
using namespace std;
backward();
/*
cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_numeric" << endl;
cerr << "a_->grad()=" << a_->grad().Debug() << endl;
cerr << "b_->grad()=" << b_->grad().Debug() << endl;
cerr << "adj_=" << adj_.Debug() << endl;
*/
}
};

View File

@ -11,12 +11,14 @@ struct UnaryNodeOp : public Node {
: Node(keywords::shape=a->shape(), //@TODO: Check keywords?
args...), a_(a) {}
void backward_numeric() {
void backward_numeric(Float delta) {
using namespace std;
backward();
/*
cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_numeric" << endl;
cerr << "a_->grad()=" << a_->grad().Debug() << endl;
cerr << "adj_=" << adj_.Debug() << endl;
*/
}
};
@ -194,6 +196,86 @@ struct NegNodeOp : public UnaryNodeOp {
Element(_1 += -_2, a_->grad(), adj_);
}
void output(const std::string &title, const std::vector<float> &vec)
{
std::cerr << title << " " << vec.size() << ":";
for (size_t i = 0; i < vec.size(); ++i) {
std::cerr << vec[i] << " ";
}
std::cerr << std::endl;
}
void backward_numeric(Float delta) {
using namespace std;
Tensor input = a_->val();
size_t totSize = GetTotalSize(input.shape());
std::vector<float> preCalcGrad(totSize);
thrust::copy(a_->grad().begin(), a_->grad().end(), preCalcGrad.begin());
output("preCalcGrad", preCalcGrad);
// use df/dx to calc grad
backward();
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
std::vector<float> diffGrad(totSize);
thrust::copy(a_->grad().begin(), a_->grad().end(), diffGrad.begin());
output("diffGrad", diffGrad);
// reset grad
thrust::copy(preCalcGrad.begin(), preCalcGrad.end(), a_->grad().begin());
//cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
// START CALC of numerical gradient
// new values
input.incr(delta);
forward();
//cerr << "input=" << input.Debug() << endl;
//cerr << "val_=" << val_.Debug() << endl;
std::vector<float> newVal(totSize);
thrust::copy(val_.begin(), val_.end(), newVal.begin());
//output("newVal", newVal);
// old values
input.incr(-delta);
forward();
//cerr << "input=" << input.Debug() << endl;
//cerr << "val_=" << val_.Debug() << endl;
std::vector<float> origVal(totSize);
thrust::copy(val_.begin(), val_.end(), origVal.begin());
//output("origVal", origVal);
// calc gradient
//cerr << "adj_=" << adj_.Debug() << endl;
std::vector<float> adjVec(totSize);
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
std::vector<float> numericalGrad(totSize);
for (size_t i = 0; i < totSize; ++i) {
numericalGrad[i] = preCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
}
output("numericalGrad", numericalGrad);
//cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
// set grad results
thrust::copy(numericalGrad.begin(), numericalGrad.end(), a_->grad().begin());
// print out diff between diffGrad and numericalGrad
std::vector<float> origGrad(totSize);
std::vector<float> diff(totSize);
thrust::copy(a_->grad().begin(), a_->grad().end(), origGrad.begin());
for (size_t i = 0; i < totSize; ++i) {
diff[i] = diffGrad[i] - numericalGrad[i];
}
output("diff", diff);
}
virtual std::string graphviz() {
std::stringstream ss;
ss << "\"" << this << "\" [shape=\"box\", label=\"-\", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;

View File

@ -207,6 +207,12 @@ class TensorImpl {
thrust::copy(begin, end, data_.begin());
}
void incr(Float incr) {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] += incr;
}
}
/**
* @brief Copy Tensor's vector from GPU to vector variable on CPU.
*
@ -429,6 +435,10 @@ class Tensor {
*/
void set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);
void incr(Float incr) {
pimpl_->incr(incr);
}
/**
* @brief Copy Tensor's vector from GPU to vector variable on CPU (const).
*

View File

@ -55,7 +55,7 @@ int main(int argc, char** argv)
// train
g.forward(batch_size);
//g.backward();
g.backward_numeric();
g.backward_numeric(0.01);
std::cout << g.graphviz() << std::endl;