Merge branch 'master' of github.com:emjotde/marian

This commit is contained in:
Marcin Junczys-Dowmunt 2016-09-20 14:14:22 +02:00
commit 7412e68dcd
4 changed files with 132 additions and 3 deletions

View File

@ -111,6 +111,92 @@ class Node : public Chainable<Tensor>,
Tensor val_;
Tensor adj_;
// for backward_numeric
void calc_numeric_grad(
Float delta,
Tensor input,
Tensor grad,
const std::vector<float> &prevCalcGrad
)
{
size_t totSize = GetTotalSize(input.shape());
std::vector<float> diffGrad(totSize);
thrust::copy(grad.begin(), grad.end(), diffGrad.begin());
output("diffGrad", diffGrad);
// reset grad
thrust::copy(prevCalcGrad.begin(), prevCalcGrad.end(), grad.begin());
//cerr << "reset a_->grad()=" << a_->grad().Debug() << endl;
// START CALC of numerical gradient
// new values
input.incr(delta);
forward();
//cerr << "input=" << input.Debug() << endl;
//cerr << "val_=" << val_.Debug() << endl;
std::vector<float> newVal(totSize);
thrust::copy(val_.begin(), val_.end(), newVal.begin());
//output("newVal", newVal);
// old values
input.incr(-delta);
forward();
//cerr << "input=" << input.Debug() << endl;
//cerr << "val_=" << val_.Debug() << endl;
std::vector<float> origVal(totSize);
thrust::copy(val_.begin(), val_.end(), origVal.begin());
//output("origVal", origVal);
// calc gradient
//cerr << "adj_=" << adj_.Debug() << endl;
std::vector<float> adjVec(totSize);
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
std::vector<float> numericalGrad(totSize);
for (size_t i = 0; i < totSize; ++i) {
numericalGrad[i] = prevCalcGrad[i] + (adjVec[i] * (newVal[i] - origVal[i]) / delta);
}
output("numericalGrad", numericalGrad);
//cerr << "numeric a_->grad()=" << a_->grad().Debug() << endl;
// set grad results
thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
// print out diff between diffGrad and numericalGrad
std::vector<float> origGrad(totSize);
std::vector<float> diff(totSize);
thrust::copy(grad.begin(), grad.end(), origGrad.begin());
for (size_t i = 0; i < totSize; ++i) {
diff[i] = (diffGrad[i] - numericalGrad[i]) ;
}
output("diff", diff);
}
std::vector<float> StoreTensorInVec(Tensor tensor)
{
size_t totSize = GetTotalSize(tensor.shape());
std::vector<float> vec(totSize);
thrust::copy(tensor.begin(), tensor.end(), vec.begin());
return vec;
}
void output(const std::string &title, const std::vector<float> &vec)
{
std::cerr << title << "(" << vec.size() << "): ";
for (size_t i = 0; i < vec.size(); ++i) {
std::cerr << vec[i] << " ";
}
std::cerr << std::endl;
}
};
}

View File

@ -10,6 +10,33 @@ struct BinaryNodeOp : public Node {
template <typename ...Args>
BinaryNodeOp(ChainPtr a, ChainPtr b, Args ...args)
: Node(args...), a_(a), b_(b) {}
void backward_numeric(Float delta) {
using namespace std;
cerr << "BinaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
//output("preCalcGradA", preCalcGradA);
std::vector<float> preCalcGradB = StoreTensorInVec(b_->grad());
//output("preCalcGradB", preCalcGradB);
// use df/dx to calc grad
backward();
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
cerr << "TENSOR A:" << endl;
calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
cerr << "TENSOR B:" << endl;
calc_numeric_grad(delta, b_->val(), b_->grad(), preCalcGradB);
// redo proper grad
backward();
}
};
/*** Matrix Product ***/

View File

@ -10,6 +10,22 @@ struct UnaryNodeOp : public Node {
UnaryNodeOp(ChainPtr a, Args ...args)
: Node(keywords::shape=a->shape(), //@TODO: Check keywords?
args...), a_(a) {}
void backward_numeric(Float delta) {
using namespace std;
cerr << "UnaryNodeOp::" << typeid(*this).name() << "::backward_numeric()" << endl;
std::vector<float> preCalcGradA = StoreTensorInVec(a_->grad());
//output("preCalcGradA", preCalcGradA);
// use df/dx to calc grad
backward();
//cerr << "orig a_->grad()=" << a_->grad().Debug() << endl;
calc_numeric_grad(delta, a_->val(), a_->grad(), preCalcGradA);
}
};
struct LogitNodeOp : public UnaryNodeOp {

View File

@ -30,8 +30,8 @@ int main(int argc, char** argv)
Expr labelExpr = g.input(shape={batch_size, output_size});
//Expr outExpr = softmax(inExpr);
//Expr outExpr = tanh(inExpr);
Expr outExpr = - inExpr;
Expr outExpr = tanh(inExpr);
//Expr outExpr = - inExpr;
Expr ceExpr = cross_entropy(outExpr, labelExpr);
Expr cost = mean(ceExpr, axis=0);
@ -55,7 +55,7 @@ int main(int argc, char** argv)
// train
g.forward(batch_size);
//g.backward();
g.backward_numeric(0.01);
g.backward_numeric(0.00001);
std::cout << g.graphviz() << std::endl;