This commit is contained in:
Hieu Hoang 2016-09-22 10:24:01 +01:00
parent d24ee0a924
commit ec72ee2ae4
3 changed files with 75 additions and 79 deletions

View File

@ -12,97 +12,97 @@ void Node::calc_numeric_grad(
{
using namespace std;
size_t inputSize = GetTotalSize(input.shape());
size_t valSize = GetTotalSize(val_.shape());
size_t inputSize = GetTotalSize(input.shape());
size_t valSize = GetTotalSize(val_.shape());
UTIL_THROW_IF2(inputSize != GetTotalSize(grad.shape()),
"inputSize != gradSize:" << inputSize << "!=" << GetTotalSize(grad.shape()));
UTIL_THROW_IF2(valSize != GetTotalSize(adj_.shape()),
"valSize != adjSize :" << valSize << "!=" << GetTotalSize(adj_.shape()));
UTIL_THROW_IF2(inputSize != GetTotalSize(grad.shape()),
"inputSize != gradSize:" << inputSize << "!=" << GetTotalSize(grad.shape()));
UTIL_THROW_IF2(valSize != GetTotalSize(adj_.shape()),
"valSize != adjSize :" << valSize << "!=" << GetTotalSize(adj_.shape()));
cerr << "inputSize=grad=" << Debug(input.shape())<< "=" << inputSize << " "
<< "valSize=adj_=" << Debug(val_.shape()) << "=" << valSize
<< endl;
cerr << "inputSize=grad=" << Debug(input.shape())<< "=" << inputSize << " "
<< "valSize=adj_=" << Debug(val_.shape()) << "=" << valSize
<< endl;
//cerr << "input=" << input.Debug() << endl;
//cerr << "adj_=" << adj_.Debug() << endl;
//cerr << "input=" << input.Debug() << endl;
//cerr << "adj_=" << adj_.Debug() << endl;
std::vector<float> origGrad(inputSize);
thrust::copy(grad.begin(), grad.end(), origGrad.begin());
cerr << "origGrad=" << grad.Debug() << endl;
//output("diffGrad", diffGrad);
std::vector<float> origGrad(inputSize);
thrust::copy(grad.begin(), grad.end(), origGrad.begin());
cerr << "origGrad=" << grad.Debug() << endl;
//output("diffGrad", diffGrad);
//output("prevCalcGrad", prevCalcGrad.begin(), prevCalcGrad.end());
//output("prevCalcGrad", prevCalcGrad.begin(), prevCalcGrad.end());
std::vector<float> inputVec(inputSize);
thrust::copy(input.begin(), input.end(), inputVec.begin());
//output("inputVec", inputVec);
std::vector<float> inputVec(inputSize);
thrust::copy(input.begin(), input.end(), inputVec.begin());
//output("inputVec", inputVec);
std::vector<float> newVal(inputSize, 0);
std::vector<float> newVal(inputSize, 0);
// LOOP thru each element in input & add delta
for (size_t inputInd = 0; inputInd < inputSize; ++inputInd) {
inputVec[inputInd] += delta;
thrust::copy(inputVec.begin(), inputVec.end(), input.begin());
//output("input", input.begin(), input.end());
forward();
for (size_t i = 0; i < valSize; ++i) {
newVal[inputInd] += val_[i];
}
//output("val_", val_.begin(), val_.end());
inputVec[inputInd] -= delta;
}
// orig value
// LOOP thru each element in input & add delta
for (size_t inputInd = 0; inputInd < inputSize; ++inputInd) {
inputVec[inputInd] += delta;
thrust::copy(inputVec.begin(), inputVec.end(), input.begin());
//output("input", input.begin(), input.end());
forward();
float sumValOrig = 0;
for (size_t i = 0; i < valSize; ++i) {
sumValOrig += val_[i];
newVal[inputInd] += val_[i];
}
//output("val_", val_.begin(), val_.end());
//output("newVal", newVal.begin(), newVal.end());
inputVec[inputInd] -= delta;
}
// calc gradient
//cerr << "adj_=" << adj_.Debug() << endl;
std::vector<float> adjVec(valSize);
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
// orig value
thrust::copy(inputVec.begin(), inputVec.end(), input.begin());
forward();
std::vector<float> numericalGrad(inputSize);
for (size_t i = 0; i < numericalGrad.size(); ++i) {
numericalGrad[i] = (newVal[i] - sumValOrig) / delta;
}
float sumValOrig = 0;
for (size_t i = 0; i < valSize; ++i) {
sumValOrig += val_[i];
}
broadcast(numericalGrad, adjVec);
//std::cerr << "broadcast size=" << numericalGrad.size() << " " << adjVec.size() << std::endl;
//output("adjVec=", adjVec.begin(), adjVec.end());
//output("newVal", newVal.begin(), newVal.end());
for (size_t i = 0; i < numericalGrad.size(); ++i) {
numericalGrad[i] *= adjVec[i];
numericalGrad[i] += prevCalcGrad[i];
}
// calc gradient
//cerr << "adj_=" << adj_.Debug() << endl;
std::vector<float> adjVec(valSize);
thrust::copy(adj_.begin(), adj_.end(), adjVec.begin());
//output("prevCalcGrad=", prevCalcGrad.begin(), prevCalcGrad.end());
//output("adjVec=", adjVec.begin(), adjVec.end());
std::vector<float> numericalGrad(inputSize);
for (size_t i = 0; i < numericalGrad.size(); ++i) {
numericalGrad[i] = (newVal[i] - sumValOrig) / delta;
}
// set grad results
thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
cerr << "numericalGrad=" << grad.Debug() << endl;
//output("numericalGrad", numericalGrad);
broadcast(numericalGrad, adjVec);
//std::cerr << "broadcast size=" << numericalGrad.size() << " " << adjVec.size() << std::endl;
//output("adjVec=", adjVec.begin(), adjVec.end());
// print out diff between origGrad and numericalGrad
std::vector<float> diff(inputSize);
for (size_t i = 0; i < origGrad.size(); ++i) {
diff[i] = origGrad[i] - numericalGrad[i];
}
cerr << "L2-norm of difference=" << L2Norm(diff) << endl << endl;
for (size_t i = 0; i < numericalGrad.size(); ++i) {
numericalGrad[i] *= adjVec[i];
numericalGrad[i] += prevCalcGrad[i];
}
// put back origGrad
thrust::copy(origGrad.begin(), origGrad.end(), grad.begin());
//output("prevCalcGrad=", prevCalcGrad.begin(), prevCalcGrad.end());
//output("adjVec=", adjVec.begin(), adjVec.end());
// set grad results
thrust::copy(numericalGrad.begin(), numericalGrad.end(), grad.begin());
cerr << "numericalGrad=" << grad.Debug() << endl;
//output("numericalGrad", numericalGrad);
// print out diff between origGrad and numericalGrad
std::vector<float> diff(inputSize);
for (size_t i = 0; i < origGrad.size(); ++i) {
diff[i] = origGrad[i] - numericalGrad[i];
}
cerr << "L2-norm of difference=" << L2Norm(diff) << endl << endl;
// put back origGrad
thrust::copy(origGrad.begin(), origGrad.end(), grad.begin());
}
float Node::L2Norm(const std::vector<float> &vec) const

View File

@ -73,7 +73,7 @@ struct DotNodeOp : public BinaryNodeOp {
virtual std::string graphviz() {
std::stringstream ss;
ss << "\"" << this << "\" [shape=\"box\", label=" << label("×")
ss << "\"" << this << "\" [shape=\"box\", label=" << label("")
<< ", style=\"filled\", fillcolor=\"orange\"]" << std::endl;
ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;
@ -185,7 +185,7 @@ struct MultNodeOp : public BinaryNodeOp {
virtual std::string graphviz() {
std::stringstream ss;
ss << "\"" << this << "\" [shape=\"box\", label=" << label("")
ss << "\"" << this << "\" [shape=\"box\", label=" << label("x")
<< ", style=\"filled\", fillcolor=\"yellow\"]" << std::endl;
ss << "\"" << a_ << "\" -> \"" << this << "\"" << std::endl;
ss << "\"" << b_ << "\" -> \"" << this << "\"" << std::endl << std::endl;

View File

@ -30,7 +30,6 @@ int main(int argc, char** argv)
Expr labelExpr = g.input(shape={batch_size, output_size});
Expr inExpr2 = g.input(shape={batch_size, input_size});
Expr inExpr3 = g.input(shape={input_size, batch_size});
vector<Expr> expr;
@ -48,11 +47,15 @@ int main(int argc, char** argv)
expr.emplace_back(relu(expr.back()));
expr.emplace_back(log(expr.back()));
expr.emplace_back(exp(expr.back()));
expr.emplace_back(dropout(expr.back()));
//expr.emplace_back(softmax_slow(expr.back()));
expr.emplace_back(softmax(expr.back()));
Expr ceExpr = cross_entropy(expr.back(), labelExpr);
Expr cost = mean(ceExpr, axis=0);
std::cout << g.graphviz() << std::endl;
// create data
//srand(0);
srand(time(NULL));
@ -79,18 +82,11 @@ int main(int argc, char** argv)
inExpr2 = inTensor2;
Tensor inTensor3({input_size, batch_size});
thrust::copy(values2.begin(), values2.end(), inTensor3.begin());
inExpr3 = inTensor3;
// train
g.forward(batch_size);
//g.backward();
g.backward_debug(0.001);
std::cout << g.graphviz() << std::endl;
/*
std::cerr << "inTensor=" << inTensor.Debug() << std::endl;