mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-05 01:31:46 +03:00
create backward_numeric() for binary nodes
This commit is contained in:
parent
8677f99597
commit
f7e5e02e8c
@ -10,6 +10,115 @@ struct BinaryNodeOp : public Node {
|
||||
template <typename ...Args>
|
||||
BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||
: Node(args...), a_(a), b_(b) {}
|
||||
|
||||
std::vector<float> StoreTensorInVec(Tensor tensor)
|
||||
{
|
||||
size_t totSize = GetTotalSize(tensor.shape());
|
||||
std::vector<float> vec(totSize);
|
||||
thrust::copy(tensor.begin(), tensor.end(), vec.begin());
|
||||
return vec;
|
||||
}
|
||||
|
||||
void calc_numeric_grad(
|
||||
Float delta,
|
||||
Tensor input,
|
||||
Tensor grad,
|
||||
const std::vector<float> &prevCalcGrad
|
||||
)
|
||||
{
|
||||
size_t totSize = GetTotalSize(input.shape());
|
||||
|
||||
std::vector<float> diffGrad(totSize);
|
||||
thrust::copy(grad.begin(), grad.end(), diffGrad.begin());
|
||||
output("diffGrad", diffGrad);
|
||||
|
||||
// reset grad
|
||||
thrust::copy(prevCalcGrad.begin(), prevCalcGrad.end(), grad.begin());
|
||||
//cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
// START CALC of numerical gradient
|
||||
// new values
|
||||
input.incr(delta);
|
||||
|
||||
forward();
|
||||
//cerr << "input=" << input.Debug() << endl;
|
||||
//cerr << "val_=" << val_.Debug() << endl;
|
||||
|
||||
std::vector<float> newVal(totSize);
|
||||
thrust::copy(val_.begin(), val_.end(), newVal.begin());
|
||||
//output("newVal", newVal);
|
||||
|
||||
// old values
|
||||
input.incr(-delta);
|
||||
|
||||
forward();
|
||||
//cerr << "input=" << input.Debug() << endl;
|
||||
//cerr << "val_=" << val_.Debug() << endl;
|
||||
|
||||
std::vector<float> origVal(totSize);
|
||||
thrust::copy(val_.begin(), val_.end(), origVal.begin());
|
||||
//output("origVal", origVal);
|
||||
|
||||
// calc gradient
|
||||
//cerr << "adj_=" << adj_.Debug() << endl;
|
||||
std::vector<float> adjVec(totSize);
|
||||
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
|
||||
|
||||
std::vector<float> numericalGrad(totSize);
|
||||
for (size_t i = 0; i < totSize; ++i) {
|
||||
numericalGrad[i] = prevCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
|
||||
}
|
||||
output("numericalGrad", numericalGrad);
|
||||
//cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
// set grad results
|
||||
thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
|
||||
|
||||
// print out diff between diffGrad and numericalGrad
|
||||
std::vector<float> origGrad(totSize);
|
||||
std::vector<float> diff(totSize);
|
||||
|
||||
thrust::copy(grad.begin(), grad.end(), origGrad.begin());
|
||||
for (size_t i = 0; i < totSize; ++i) {
|
||||
diff[i] = (diffGrad[i] - numericalGrad[i]) / delta;
|
||||
}
|
||||
output("diff", diff);
|
||||
|
||||
}
|
||||
|
||||
void backward_numeric(Float delta) {
|
||||
using namespace std;
|
||||
|
||||
cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
|
||||
|
||||
std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
|
||||
//output("preCalcGradA", preCalcGradA);
|
||||
|
||||
std::vector<float> preCalcGradB = StoreTensorInVec(b_->grad());
|
||||
//output("preCalcGradB", preCalcGradB);
|
||||
|
||||
// use df/dx to calc grad
|
||||
backward();
|
||||
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
cerr << "TENSOR A:" << endl;
|
||||
calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
|
||||
cerr << "TENSOR B:" << endl;
|
||||
calc_numeric_grad(delta, b_->val(), b_->grad(), preCalcGradB);
|
||||
|
||||
// redo proper grad
|
||||
backward();
|
||||
}
|
||||
|
||||
void output(const std::string &title, const std::vector<float> &vec)
|
||||
{
|
||||
std::cerr << title << " " << vec.size() << ":";
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
std::cerr << vec[i] << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/*** Matrix Product ***/
|
||||
|
@ -10,6 +10,89 @@ struct UnaryNodeOp : public Node {
|
||||
UnaryNodeOp(ChainPtr a, Args ...args)
|
||||
: Node(keywords::shape=a->shape(), //@TODO: Check keywords?
|
||||
args...), a_(a) {}
|
||||
|
||||
void backward_numeric(Float delta) {
|
||||
using namespace std;
|
||||
|
||||
cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
|
||||
|
||||
Tensor input = a_->val();
|
||||
size_t totSize = GetTotalSize(input.shape());
|
||||
|
||||
std::vector<float> preCalcGrad(totSize);
|
||||
thrust::copy(a_->grad().begin(), a_->grad().end(), preCalcGrad.begin());
|
||||
output("preCalcGrad", preCalcGrad);
|
||||
|
||||
// use df/dx to calc grad
|
||||
backward();
|
||||
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
std::vector<float> diffGrad(totSize);
|
||||
thrust::copy(a_->grad().begin(), a_->grad().end(), diffGrad.begin());
|
||||
output("diffGrad", diffGrad);
|
||||
|
||||
// reset grad
|
||||
thrust::copy(preCalcGrad.begin(), preCalcGrad.end(), a_->grad().begin());
|
||||
//cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
// START CALC of numerical gradient
|
||||
// new values
|
||||
input.incr(delta);
|
||||
|
||||
forward();
|
||||
//cerr << "input=" << input.Debug() << endl;
|
||||
//cerr << "val_=" << val_.Debug() << endl;
|
||||
|
||||
std::vector<float> newVal(totSize);
|
||||
thrust::copy(val_.begin(), val_.end(), newVal.begin());
|
||||
//output("newVal", newVal);
|
||||
|
||||
// old values
|
||||
input.incr(-delta);
|
||||
|
||||
forward();
|
||||
//cerr << "input=" << input.Debug() << endl;
|
||||
//cerr << "val_=" << val_.Debug() << endl;
|
||||
|
||||
std::vector<float> origVal(totSize);
|
||||
thrust::copy(val_.begin(), val_.end(), origVal.begin());
|
||||
//output("origVal", origVal);
|
||||
|
||||
// calc gradient
|
||||
//cerr << "adj_=" << adj_.Debug() << endl;
|
||||
std::vector<float> adjVec(totSize);
|
||||
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
|
||||
|
||||
std::vector<float> numericalGrad(totSize);
|
||||
for (size_t i = 0; i < totSize; ++i) {
|
||||
numericalGrad[i] = preCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
|
||||
}
|
||||
output("numericalGrad", numericalGrad);
|
||||
//cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
|
||||
|
||||
// set grad results
|
||||
thrust::copy(numericalGrad.begin(), numericalGrad.end(), a_->grad().begin());
|
||||
|
||||
// print out diff between diffGrad and numericalGrad
|
||||
std::vector<float> origGrad(totSize);
|
||||
std::vector<float> diff(totSize);
|
||||
|
||||
thrust::copy(a_->grad().begin(), a_->grad().end(), origGrad.begin());
|
||||
for (size_t i = 0; i < totSize; ++i) {
|
||||
diff[i] = (diffGrad[i] - numericalGrad[i]) / delta;
|
||||
}
|
||||
output("diff", diff);
|
||||
}
|
||||
|
||||
void output(const std::string &title, const std::vector<float> &vec)
|
||||
{
|
||||
std::cerr << title << " " << vec.size() << ":";
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
std::cerr << vec[i] << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct LogitNodeOp : public UnaryNodeOp {
|
||||
|
Loading…
Reference in New Issue
Block a user