mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
more modularize numerical gradient
This commit is contained in:
parent
c8c7daf815
commit
78ec0ad7a5
47
src/node.cu
47
src/node.cu
@ -29,7 +29,7 @@ void Node::calc_numeric_grad(
|
||||
|
||||
std::vector<float> origGrad(inputSize);
|
||||
thrust::copy(grad.begin(), grad.end(), origGrad.begin());
|
||||
cerr << "origGrad=" << grad.Debug() << endl;
|
||||
//cerr << "origGrad=" << grad.Debug() << endl;
|
||||
//output("diffGrad", diffGrad);
|
||||
|
||||
//output("prevCalcGrad", prevCalcGrad.begin(), prevCalcGrad.end());
|
||||
@ -91,27 +91,7 @@ void Node::calc_numeric_grad(
|
||||
|
||||
// set grad results
|
||||
thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
|
||||
cerr << "numericalGrad=" << grad.Debug() << endl;
|
||||
//output("numericalGrad", numericalGrad);
|
||||
|
||||
// print out diff between origGrad and numericalGrad
|
||||
std::vector<float> diff(inputSize);
|
||||
for (size_t i = 0; i < origGrad.size(); ++i) {
|
||||
diff[i] = origGrad[i] - numericalGrad[i];
|
||||
}
|
||||
cerr << "L2-norm of difference=" << L2Norm(diff) << endl << endl;
|
||||
|
||||
// put back origGrad
|
||||
thrust::copy(origGrad.begin(), origGrad.end(), grad.begin());
|
||||
}
|
||||
|
||||
float Node::L2Norm(const std::vector<float> &vec) const
|
||||
{
|
||||
float ret = 0;
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
ret += vec[i] * vec[i];
|
||||
}
|
||||
return sqrt(ret);
|
||||
}
|
||||
|
||||
void Node::broadcast(const std::vector<float> &largeVec, std::vector<float> &smallVec)
|
||||
@ -130,5 +110,30 @@ void Node::broadcast(const std::vector<float> &largeVec, std::vector<float> &sma
|
||||
}
|
||||
}
|
||||
|
||||
void Node::outputL2Norm(const std::vector<float> &x, const std::vector<float> &y) const
|
||||
{
|
||||
using namespace std;
|
||||
// print out diff between diffGradA and numericalGrad
|
||||
if(x.size() != y.size()) {
|
||||
cerr << "size error: " << x.size() << "!=" << y.size() << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::vector<float> diff(x.size());
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
diff[i] = x[i] - y[i];
|
||||
}
|
||||
cerr << "L2-norm of difference=" << L2Norm(diff) << endl << endl;
|
||||
}
|
||||
|
||||
float Node::L2Norm(const std::vector<float> &vec) const
|
||||
{
|
||||
float ret = 0;
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
ret += vec[i] * vec[i];
|
||||
}
|
||||
return sqrt(ret);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -126,6 +126,7 @@ class Node : public Chainable<Tensor>,
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
void outputL2Norm(const std::vector<float> &x, const std::vector<float> &y) const;
|
||||
|
||||
void calc_numeric_grad(
|
||||
Float delta,
|
||||
|
@ -17,23 +17,39 @@ struct BinaryNodeOp : public Node {
|
||||
|
||||
cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_debug()" << endl;
|
||||
|
||||
std::vector<float> preCalcGradA;
|
||||
std::vector<float> preCalcGradA, diffGradA, numericalGradA;
|
||||
preCalcGradA << a_->grad();
|
||||
//output("preCalcGradA", preCalcGradA);
|
||||
|
||||
std::vector<float> preCalcGradB;
|
||||
std::vector<float> preCalcGradB, diffGradB, numericalGradB;
|
||||
preCalcGradB << b_->grad();
|
||||
//output("preCalcGradB", preCalcGradB);
|
||||
|
||||
// use df/dx to calc grad
|
||||
backward();
|
||||
cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
cerr << "orig b_->grad()=" << b_->grad().Debug() << endl;
|
||||
|
||||
diffGradA << a_->grad();
|
||||
diffGradB << b_->grad();
|
||||
|
||||
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
//cerr << "orig b_->grad()=" << b_->grad().Debug() << endl;
|
||||
|
||||
cerr << "TENSOR A:" << endl;
|
||||
calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
|
||||
cerr << "numerical a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
numericalGradA << a_->grad();
|
||||
outputL2Norm(diffGradA, numericalGradA);
|
||||
|
||||
|
||||
cerr << "TENSOR B:" << endl;
|
||||
calc_numeric_grad(delta, b_->val(), b_->grad(), preCalcGradB);
|
||||
cerr << "numerical b_->grad()=" << b_->grad().Debug() << endl;
|
||||
|
||||
numericalGradB << b_->grad();
|
||||
outputL2Norm(diffGradB, numericalGradB);
|
||||
|
||||
}
|
||||
|
||||
|
@ -22,13 +22,19 @@ struct UnaryNodeOp : public Node {
|
||||
|
||||
// use df/dx to calc grad
|
||||
backward();
|
||||
cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
diffGradA << a_->grad();
|
||||
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
//a_->grad().set(preCalcGradA);
|
||||
a_->grad().set(preCalcGradA);
|
||||
|
||||
calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
|
||||
cerr << "numerical a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
numericalGradA << a_->grad();
|
||||
|
||||
outputL2Norm(diffGradA, numericalGradA);
|
||||
|
||||
// reset to diff grad
|
||||
a_->grad().set(diffGradA);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user